1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/upipe/nx_user_pipe.h>
56
57 #define NX_UPIPE_RINGSIZE 128 /* default ring size */
58 #define NX_UPIPE_MAXRINGS NX_MAX_NUM_RING_PAIR
59 #define NX_UPIPE_MINSLOTS 2 /* XXX same as above */
60 #define NX_UPIPE_MAXSLOTS 4096 /* XXX same as above */
61 #define NX_UPIPE_BUFSIZE (2 * 1024)
62 #define NX_UPIPE_MINBUFSIZE 1024
63 #define NX_UPIPE_MAXBUFSIZE (16 * 1024)
64 #define NX_UPIPE_MHINTS NEXUS_MHINTS_NORMAL
65
66 static int nx_upipe_na_alloc(struct nexus_adapter *, uint32_t);
67 static struct nexus_upipe_adapter *nx_upipe_find(struct nexus_adapter *,
68 uint32_t);
69 static int nx_upipe_na_add(struct nexus_adapter *,
70 struct nexus_upipe_adapter *);
71 static void nx_upipe_na_remove(struct nexus_adapter *,
72 struct nexus_upipe_adapter *);
73 static int nx_upipe_na_txsync(struct __kern_channel_ring *,
74 struct proc *, uint32_t);
75 static int nx_upipe_na_txsync_locked(struct __kern_channel_ring *,
76 struct proc *, uint32_t, int *, boolean_t);
77 static int nx_upipe_na_rxsync(struct __kern_channel_ring *,
78 struct proc *, uint32_t);
79 static int nx_upipe_na_krings_create(struct nexus_adapter *,
80 struct kern_channel *);
81 static int nx_upipe_na_activate(struct nexus_adapter *, na_activate_mode_t);
82 static void nx_upipe_na_krings_delete(struct nexus_adapter *,
83 struct kern_channel *, boolean_t);
84 static void nx_upipe_na_dtor(struct nexus_adapter *);
85
86 static void nx_upipe_dom_init(struct nxdom *);
87 static void nx_upipe_dom_terminate(struct nxdom *);
88 static void nx_upipe_dom_fini(struct nxdom *);
89 static int nx_upipe_dom_bind_port(struct kern_nexus *, nexus_port_t *,
90 struct nxbind *, void *);
91 static int nx_upipe_dom_unbind_port(struct kern_nexus *, nexus_port_t);
92 static int nx_upipe_dom_connect(struct kern_nexus_domain_provider *,
93 struct kern_nexus *, struct kern_channel *, struct chreq *,
94 struct kern_channel *, struct nxbind *, struct proc *);
95 static void nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *,
96 struct kern_nexus *, struct kern_channel *);
97 static void nx_upipe_dom_defunct(struct kern_nexus_domain_provider *,
98 struct kern_nexus *, struct kern_channel *, struct proc *);
99 static void nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *,
100 struct kern_nexus *, struct kern_channel *, boolean_t);
101
102 static int nx_upipe_prov_init(struct kern_nexus_domain_provider *);
103 static int nx_upipe_prov_params_adjust(
104 const struct kern_nexus_domain_provider *, const struct nxprov_params *,
105 struct nxprov_adjusted_params *);
106 static int nx_upipe_prov_params(struct kern_nexus_domain_provider *,
107 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
108 struct skmem_region_params[SKMEM_REGIONS]);
109 static int nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *,
110 struct kern_nexus *, struct nexus_adapter *);
111 static void nx_upipe_prov_fini(struct kern_nexus_domain_provider *);
112 static int nx_upipe_prov_nx_ctor(struct kern_nexus *);
113 static void nx_upipe_prov_nx_dtor(struct kern_nexus *);
114
115 static struct nexus_upipe_adapter *na_upipe_alloc(zalloc_flags_t);
116 static void na_upipe_free(struct nexus_adapter *);
117
118 static struct nx_upipe *nx_upipe_alloc(zalloc_flags_t);
119 static void nx_upipe_free(struct nx_upipe *);
120
121 #if (DEVELOPMENT || DEBUG)
122 static uint32_t nx_upipe_mhints = 0;
123 SYSCTL_NODE(_kern_skywalk, OID_AUTO, upipe, CTLFLAG_RW | CTLFLAG_LOCKED,
124 0, "Skywalk upipe tuning");
125 SYSCTL_UINT(_kern_skywalk_upipe, OID_AUTO, nx_mhints,
126 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_upipe_mhints, 0,
127 "upipe nexus memory usage hints");
128 #endif /* (DEVELOPMENT || DEBUG) */
129
130 struct nxdom nx_upipe_dom_s = {
131 .nxdom_prov_head =
132 STAILQ_HEAD_INITIALIZER(nx_upipe_dom_s.nxdom_prov_head),
133 .nxdom_type = NEXUS_TYPE_USER_PIPE,
134 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
135 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
136 .nxdom_name = "upipe",
137 .nxdom_ports = {
138 .nb_def = 2,
139 .nb_min = 2,
140 .nb_max = 2,
141 },
142 .nxdom_tx_rings = {
143 .nb_def = 1,
144 .nb_min = 1,
145 .nb_max = NX_UPIPE_MAXRINGS,
146 },
147 .nxdom_rx_rings = {
148 .nb_def = 1,
149 .nb_min = 1,
150 .nb_max = NX_UPIPE_MAXRINGS,
151 },
152 .nxdom_tx_slots = {
153 .nb_def = NX_UPIPE_RINGSIZE,
154 .nb_min = NX_UPIPE_MINSLOTS,
155 .nb_max = NX_UPIPE_MAXSLOTS,
156 },
157 .nxdom_rx_slots = {
158 .nb_def = NX_UPIPE_RINGSIZE,
159 .nb_min = NX_UPIPE_MINSLOTS,
160 .nb_max = NX_UPIPE_MAXSLOTS,
161 },
162 .nxdom_buf_size = {
163 .nb_def = NX_UPIPE_BUFSIZE,
164 .nb_min = NX_UPIPE_MINBUFSIZE,
165 .nb_max = NX_UPIPE_MAXBUFSIZE,
166 },
167 .nxdom_meta_size = {
168 .nb_def = NX_METADATA_OBJ_MIN_SZ,
169 .nb_min = NX_METADATA_OBJ_MIN_SZ,
170 .nb_max = NX_METADATA_USR_MAX_SZ,
171 },
172 .nxdom_stats_size = {
173 .nb_def = 0,
174 .nb_min = 0,
175 .nb_max = NX_STATS_MAX_SZ,
176 },
177 .nxdom_pipes = {
178 .nb_def = 0,
179 .nb_min = 0,
180 .nb_max = NX_UPIPE_MAXPIPES,
181 },
182 .nxdom_mhints = {
183 .nb_def = NX_UPIPE_MHINTS,
184 .nb_min = NEXUS_MHINTS_NORMAL,
185 .nb_max = (NEXUS_MHINTS_NORMAL | NEXUS_MHINTS_WILLNEED |
186 NEXUS_MHINTS_LOWLATENCY | NEXUS_MHINTS_HIUSE),
187 },
188 .nxdom_flowadv_max = {
189 .nb_def = 0,
190 .nb_min = 0,
191 .nb_max = NX_FLOWADV_MAX,
192 },
193 .nxdom_nexusadv_size = {
194 .nb_def = 0,
195 .nb_min = 0,
196 .nb_max = NX_NEXUSADV_MAX_SZ,
197 },
198 .nxdom_capabilities = {
199 .nb_def = NXPCAP_USER_CHANNEL,
200 .nb_min = NXPCAP_USER_CHANNEL,
201 .nb_max = NXPCAP_USER_CHANNEL,
202 },
203 .nxdom_qmap = {
204 .nb_def = NEXUS_QMAP_TYPE_INVALID,
205 .nb_min = NEXUS_QMAP_TYPE_INVALID,
206 .nb_max = NEXUS_QMAP_TYPE_INVALID,
207 },
208 .nxdom_max_frags = {
209 .nb_def = NX_PBUF_FRAGS_DEFAULT,
210 .nb_min = NX_PBUF_FRAGS_MIN,
211 .nb_max = NX_PBUF_FRAGS_DEFAULT,
212 },
213 .nxdom_init = nx_upipe_dom_init,
214 .nxdom_terminate = nx_upipe_dom_terminate,
215 .nxdom_fini = nx_upipe_dom_fini,
216 .nxdom_find_port = NULL,
217 .nxdom_port_is_reserved = NULL,
218 .nxdom_bind_port = nx_upipe_dom_bind_port,
219 .nxdom_unbind_port = nx_upipe_dom_unbind_port,
220 .nxdom_connect = nx_upipe_dom_connect,
221 .nxdom_disconnect = nx_upipe_dom_disconnect,
222 .nxdom_defunct = nx_upipe_dom_defunct,
223 .nxdom_defunct_finalize = nx_upipe_dom_defunct_finalize,
224 };
225
226 static struct kern_nexus_domain_provider nx_upipe_prov_s = {
227 .nxdom_prov_name = NEXUS_PROVIDER_USER_PIPE,
228 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
229 .nxdom_prov_cb = {
230 .dp_cb_init = nx_upipe_prov_init,
231 .dp_cb_fini = nx_upipe_prov_fini,
232 .dp_cb_params = nx_upipe_prov_params,
233 .dp_cb_mem_new = nx_upipe_prov_mem_new,
234 .dp_cb_config = NULL,
235 .dp_cb_nx_ctor = nx_upipe_prov_nx_ctor,
236 .dp_cb_nx_dtor = nx_upipe_prov_nx_dtor,
237 .dp_cb_nx_mem_info = NULL,
238 .dp_cb_nx_mib_get = NULL,
239 .dp_cb_nx_stop = NULL,
240 },
241 };
242
243 static ZONE_DECLARE(na_upipe_zone, SKMEM_ZONE_PREFIX ".na.upipe",
244 sizeof(struct nexus_upipe_adapter), ZC_ZFREE_CLEARMEM);
245
246 static ZONE_DECLARE(nx_upipe_zone, SKMEM_ZONE_PREFIX ".nx.upipe",
247 sizeof(struct nx_upipe), ZC_ZFREE_CLEARMEM);
248
249 #define SKMEM_TAG_PIPES "com.apple.skywalk.pipes"
250 static kern_allocation_name_t skmem_tag_pipes;
251
252 static void
nx_upipe_dom_init(struct nxdom * nxdom)253 nx_upipe_dom_init(struct nxdom *nxdom)
254 {
255 SK_LOCK_ASSERT_HELD();
256 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
257
258 ASSERT(skmem_tag_pipes == NULL);
259 skmem_tag_pipes = kern_allocation_name_allocate(SKMEM_TAG_PIPES, 0);
260 ASSERT(skmem_tag_pipes != NULL);
261
262 (void) nxdom_prov_add(nxdom, &nx_upipe_prov_s);
263 }
264
265 static void
nx_upipe_dom_terminate(struct nxdom * nxdom)266 nx_upipe_dom_terminate(struct nxdom *nxdom)
267 {
268 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
269
270 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
271 nxdom_prov_link, tnxdp) {
272 (void) nxdom_prov_del(nxdom_prov);
273 }
274
275 if (skmem_tag_pipes != NULL) {
276 kern_allocation_name_release(skmem_tag_pipes);
277 skmem_tag_pipes = NULL;
278 }
279 }
280
281 static void
nx_upipe_dom_fini(struct nxdom * nxdom)282 nx_upipe_dom_fini(struct nxdom *nxdom)
283 {
284 #pragma unused(nxdom)
285 }
286
287 static int
nx_upipe_prov_init(struct kern_nexus_domain_provider * nxdom_prov)288 nx_upipe_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
289 {
290 #pragma unused(nxdom_prov)
291 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
292 return 0;
293 }
294
295 static int
nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)296 nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
297 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
298 {
299 #pragma unused(nxdom_prov, nxp)
300 /*
301 * User pipe requires double the amount of rings.
302 * The ring counts must also be symmetrical.
303 */
304 if (*(adj->adj_tx_rings) != *(adj->adj_rx_rings)) {
305 SK_ERR("rings: tx (%u) != rx (%u)", *(adj->adj_tx_rings),
306 *(adj->adj_rx_rings));
307 return EINVAL;
308 }
309
310 *(adj->adj_tx_rings) *= 2;
311 *(adj->adj_rx_rings) *= 2;
312
313 if (adj->adj_buf_srp->srp_r_seg_size == 0) {
314 adj->adj_buf_srp->srp_r_seg_size = skmem_usr_buf_seg_size;
315 }
316
317 /* enable magazines layer for metadata */
318 *(adj->adj_md_magazines) = TRUE;
319
320 return 0;
321 }
322
323 static int
nx_upipe_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])324 nx_upipe_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
325 const uint32_t req, const struct nxprov_params *nxp0,
326 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
327 {
328 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
329 int err;
330
331 err = nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
332 nxdom, nxdom, nxdom, nx_upipe_prov_params_adjust);
333 #if (DEVELOPMENT || DEBUG)
334 /* sysctl override */
335 if ((err == 0) && (nx_upipe_mhints != 0)) {
336 nxp->nxp_mhints = nx_upipe_mhints;
337 }
338 #endif /* (DEVELOPMENT || DEBUG) */
339 return err;
340 }
341
342 static int
nx_upipe_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)343 nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
344 struct kern_nexus *nx, struct nexus_adapter *na)
345 {
346 #pragma unused(nxdom_prov)
347 int err = 0;
348
349 SK_DF(SK_VERB_USER_PIPE,
350 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
351 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
352 SK_KVA(na));
353
354 ASSERT(na->na_arena == NULL);
355 ASSERT(NX_USER_CHANNEL_PROV(nx));
356 /*
357 * The underlying nexus adapters already share the same memory
358 * allocator, and thus we don't care about storing the pp in
359 * the nexus.
360 *
361 * This means that clients calling kern_nexus_get_pbufpool()
362 * will get NULL, but this is fine since we don't expose the
363 * user pipe to external kernel clients.
364 */
365 na->na_arena = skmem_arena_create_for_nexus(na,
366 NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
367 FALSE, NULL, &err);
368 ASSERT(na->na_arena != NULL || err != 0);
369
370 return err;
371 }
372
373 static void
nx_upipe_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)374 nx_upipe_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
375 {
376 #pragma unused(nxdom_prov)
377 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
378 }
379
380 static int
nx_upipe_prov_nx_ctor(struct kern_nexus * nx)381 nx_upipe_prov_nx_ctor(struct kern_nexus *nx)
382 {
383 SK_LOCK_ASSERT_HELD();
384 ASSERT(nx->nx_arg == NULL);
385
386 SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
387
388 nx->nx_arg = nx_upipe_alloc(Z_WAITOK);
389 SK_D("create new upipe 0x%llx for nexus 0x%llx",
390 SK_KVA(NX_UPIPE_PRIVATE(nx)), SK_KVA(nx));
391
392 return 0;
393 }
394
395 static void
nx_upipe_prov_nx_dtor(struct kern_nexus * nx)396 nx_upipe_prov_nx_dtor(struct kern_nexus *nx)
397 {
398 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
399
400 SK_LOCK_ASSERT_HELD();
401
402 SK_D("nexus 0x%llx (%s) upipe 0x%llx", SK_KVA(nx),
403 NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(u));
404
405 if (u->nup_cli_nxb != NULL) {
406 nxb_free(u->nup_cli_nxb);
407 u->nup_cli_nxb = NULL;
408 }
409 if (u->nup_srv_nxb != NULL) {
410 nxb_free(u->nup_srv_nxb);
411 u->nup_srv_nxb = NULL;
412 }
413
414 SK_DF(SK_VERB_USER_PIPE, "marking upipe 0x%llx as free", SK_KVA(u));
415 nx_upipe_free(u);
416 nx->nx_arg = NULL;
417 }
418
419 static struct nexus_upipe_adapter *
na_upipe_alloc(zalloc_flags_t how)420 na_upipe_alloc(zalloc_flags_t how)
421 {
422 struct nexus_upipe_adapter *pna;
423
424 _CASSERT(offsetof(struct nexus_upipe_adapter, pna_up) == 0);
425
426 pna = zalloc_flags(na_upipe_zone, how | Z_ZERO);
427 if (pna) {
428 pna->pna_up.na_type = NA_USER_PIPE;
429 pna->pna_up.na_free = na_upipe_free;
430 }
431 return pna;
432 }
433
434 static void
na_upipe_free(struct nexus_adapter * na)435 na_upipe_free(struct nexus_adapter *na)
436 {
437 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
438
439 ASSERT(pna->pna_up.na_refcount == 0);
440 SK_DF(SK_VERB_MEM, "pna 0x%llx FREE", SK_KVA(pna));
441 bzero(pna, sizeof(*pna));
442 zfree(na_upipe_zone, pna);
443 }
444
445 static int
nx_upipe_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb0,void * info)446 nx_upipe_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
447 struct nxbind *nxb0, void *info)
448 {
449 #pragma unused(info)
450 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
451 struct nxbind *nxb = NULL;
452 int error = 0;
453
454 ASSERT(nx_port != NULL);
455 ASSERT(nxb0 != NULL);
456
457 switch (*nx_port) {
458 case NEXUS_PORT_USER_PIPE_CLIENT:
459 case NEXUS_PORT_USER_PIPE_SERVER:
460 if ((*nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
461 u->nup_cli_nxb != NULL) ||
462 (*nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
463 u->nup_srv_nxb != NULL)) {
464 error = EEXIST;
465 break;
466 }
467
468 nxb = nxb_alloc(Z_WAITOK);
469 nxb_move(nxb0, nxb);
470 if (*nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
471 u->nup_cli_nxb = nxb;
472 } else {
473 u->nup_srv_nxb = nxb;
474 }
475
476 ASSERT(error == 0);
477 break;
478
479 default:
480 error = EDOM;
481 break;
482 }
483
484 return error;
485 }
486
487 static int
nx_upipe_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)488 nx_upipe_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
489 {
490 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
491 struct nxbind *nxb = NULL;
492 int error = 0;
493
494 ASSERT(nx_port != NEXUS_PORT_ANY);
495
496 switch (nx_port) {
497 case NEXUS_PORT_USER_PIPE_CLIENT:
498 case NEXUS_PORT_USER_PIPE_SERVER:
499 if ((nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
500 u->nup_cli_nxb == NULL) ||
501 (nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
502 u->nup_srv_nxb == NULL)) {
503 error = ENOENT;
504 break;
505 }
506
507 if (nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
508 nxb = u->nup_cli_nxb;
509 u->nup_cli_nxb = NULL;
510 } else {
511 nxb = u->nup_srv_nxb;
512 u->nup_srv_nxb = NULL;
513 }
514 nxb_free(nxb);
515 ASSERT(error == 0);
516 break;
517
518 default:
519 error = EDOM;
520 break;
521 }
522
523 return error;
524 }
525
526 static int
nx_upipe_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)527 nx_upipe_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
528 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
529 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
530 {
531 #pragma unused(nxdom_prov)
532 nexus_port_t port = chr->cr_port;
533 int err = 0;
534
535 SK_LOCK_ASSERT_HELD();
536
537 ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
538 ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
539 nxdom_prov->nxdom_prov_dom->nxdom_type &&
540 nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_USER_PIPE);
541
542 /*
543 * XXX: channel in user packet pool mode is not supported for
544 * user-pipe for now.
545 */
546 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
547 SK_ERR("User packet pool mode not supported for upipe");
548 err = ENOTSUP;
549 goto done;
550 }
551
552 if (chr->cr_mode & CHMODE_EVENT_RING) {
553 SK_ERR("event ring is not supported for upipe");
554 err = ENOTSUP;
555 goto done;
556 }
557
558 if (chr->cr_mode & CHMODE_LOW_LATENCY) {
559 SK_ERR("low latency is not supported for upipe");
560 err = ENOTSUP;
561 goto done;
562 }
563
564 if (port == NEXUS_PORT_USER_PIPE_SERVER) {
565 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_MASTER;
566 } else if (port == NEXUS_PORT_USER_PIPE_CLIENT) {
567 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_SLAVE;
568 } else {
569 err = EINVAL;
570 goto done;
571 }
572
573 chr->cr_endpoint = chr->cr_real_endpoint;
574 chr->cr_ring_set = RING_SET_DEFAULT;
575 chr->cr_pipe_id = 0;
576 (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "upipe:%llu:%.*s",
577 nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
578 nx->nx_prov->nxprov_params->nxp_name);
579
580 err = na_connect(nx, ch, chr, ch0, nxb, p);
581 done:
582 return err;
583 }
584
585 static void
nx_upipe_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)586 nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
587 struct kern_nexus *nx, struct kern_channel *ch)
588 {
589 #pragma unused(nxdom_prov)
590 SK_LOCK_ASSERT_HELD();
591
592 SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
593 SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
594 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
595
596 na_disconnect(nx, ch);
597 /*
598 * Set NXF_REJECT on the nexus which would cause any channel on the
599 * peer adapter to cease to function.
600 */
601 if (NX_PROV(nx)->nxprov_params->nxp_reject_on_close) {
602 atomic_bitset_32(&nx->nx_flags, NXF_REJECT);
603 }
604 }
605
606 static void
nx_upipe_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)607 nx_upipe_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
608 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
609 {
610 #pragma unused(nxdom_prov, nx)
611 struct nexus_adapter *na = ch->ch_na;
612 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
613 ring_id_t qfirst = ch->ch_first[NR_TX];
614 ring_id_t qlast = ch->ch_last[NR_TX];
615 uint32_t i;
616
617 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
618 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
619 ASSERT(na->na_type == NA_USER_PIPE);
620
621 /*
622 * Inform the peer receiver thread in nx_upipe_na_rxsync() or the
623 * peer transmit thread in nx_upipe_na_txsync() about
624 * this endpoint going defunct. We utilize the TX ring's
625 * lock for serialization, since that is what's being used
626 * by the receiving endpoint.
627 */
628 for (i = qfirst; i < qlast; i++) {
629 /*
630 * For maintaining lock ordering between the two channels of
631 * user pipe.
632 */
633 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
634 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
635 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
636 } else {
637 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
638 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
639 }
640 }
641
642 na_ch_rings_defunct(ch, p);
643
644 for (i = qfirst; i < qlast; i++) {
645 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
646 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
647 (void) kr_exit(&NAKR(na, NR_TX)[i]);
648 } else {
649 (void) kr_exit(&NAKR(na, NR_TX)[i]);
650 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
651 }
652 }
653 }
654
655 static void
nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)656 nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
657 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
658 {
659 #pragma unused(nxdom_prov)
660 struct nexus_upipe_adapter *pna =
661 (struct nexus_upipe_adapter *)ch->ch_na;
662
663 if (!locked) {
664 SK_LOCK_ASSERT_NOTHELD();
665 SK_LOCK();
666 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
667 } else {
668 SK_LOCK_ASSERT_HELD();
669 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
670 }
671
672 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
673 ASSERT(ch->ch_na->na_type == NA_USER_PIPE);
674
675 /*
676 * At this point, we know that the arena shared by the master and
677 * slave adapters has no more valid mappings on the channels opened
678 * to them. We need to invoke na_defunct() on both adapters to
679 * release any remaining slots attached to their rings.
680 *
681 * Note that the 'ch' that we pass in here is irrelevant as we
682 * don't support user packet pool for user pipe.
683 */
684 na_defunct(nx, ch, &pna->pna_up, locked);
685 if (pna->pna_peer != NULL) {
686 na_defunct(nx, ch, &pna->pna_peer->pna_up, locked);
687 }
688
689 /*
690 * And if their parent adapter (the memory owner) is a pseudo
691 * nexus adapter that we initially created in nx_upipe_na_find(),
692 * invoke na_defunct() on it now to do the final teardown on
693 * the arena.
694 */
695 if (pna->pna_parent->na_type == NA_PSEUDO) {
696 na_defunct(nx, ch, pna->pna_parent, locked);
697 }
698
699 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
700 ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
701 nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
702 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
703
704 if (!locked) {
705 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
706 SK_UNLOCK();
707 } else {
708 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
709 SK_LOCK_ASSERT_HELD();
710 }
711 }
712
713 /* allocate the pipe array in the parent adapter */
714 static int
nx_upipe_na_alloc(struct nexus_adapter * na,uint32_t npipes)715 nx_upipe_na_alloc(struct nexus_adapter *na, uint32_t npipes)
716 {
717 struct nexus_upipe_adapter **npa;
718 size_t len, orig_len;
719
720 if (npipes <= na->na_max_pipes) {
721 /* we already have more entries that requested */
722 return 0;
723 }
724 if (npipes < na->na_next_pipe || npipes > NX_UPIPE_MAXPIPES) {
725 return EINVAL;
726 }
727
728 orig_len = sizeof(struct nexus_upipe_adapter *) * na->na_max_pipes;
729 len = sizeof(struct nexus_upipe_adapter *) * npipes;
730 npa = sk_realloc(na->na_pipes, orig_len, len, Z_WAITOK, skmem_tag_pipes);
731 if (npa == NULL) {
732 return ENOMEM;
733 }
734
735 na->na_pipes = npa;
736 na->na_max_pipes = npipes;
737
738 return 0;
739 }
740
741 /* deallocate the parent array in the parent adapter */
742 void
nx_upipe_na_dealloc(struct nexus_adapter * na)743 nx_upipe_na_dealloc(struct nexus_adapter *na)
744 {
745 if (na->na_pipes) {
746 if (na->na_next_pipe > 0) {
747 SK_ERR("freeing not empty pipe array for %s "
748 "(%u dangling pipes)!", na->na_name,
749 na->na_next_pipe);
750 }
751 sk_free(na->na_pipes,
752 sizeof(struct nexus_upipe_adapter *) * na->na_max_pipes);
753 na->na_pipes = NULL;
754 na->na_max_pipes = 0;
755 na->na_next_pipe = 0;
756 }
757 }
758
759 /* find a pipe endpoint with the given id among the parent's pipes */
760 static struct nexus_upipe_adapter *
nx_upipe_find(struct nexus_adapter * parent,uint32_t pipe_id)761 nx_upipe_find(struct nexus_adapter *parent, uint32_t pipe_id)
762 {
763 uint32_t i;
764 struct nexus_upipe_adapter *na;
765
766 for (i = 0; i < parent->na_next_pipe; i++) {
767 na = parent->na_pipes[i];
768 if (na->pna_id == pipe_id) {
769 return na;
770 }
771 }
772 return NULL;
773 }
774
775 /* add a new pipe endpoint to the parent array */
776 static int
nx_upipe_na_add(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)777 nx_upipe_na_add(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
778 {
779 if (parent->na_next_pipe >= parent->na_max_pipes) {
780 uint32_t npipes = parent->na_max_pipes ?
781 2 * parent->na_max_pipes : 2;
782 int error = nx_upipe_na_alloc(parent, npipes);
783 if (error) {
784 return error;
785 }
786 }
787
788 parent->na_pipes[parent->na_next_pipe] = na;
789 na->pna_parent_slot = parent->na_next_pipe;
790 parent->na_next_pipe++;
791 return 0;
792 }
793
794 /* remove the given pipe endpoint from the parent array */
795 static void
nx_upipe_na_remove(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)796 nx_upipe_na_remove(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
797 {
798 uint32_t n;
799 n = --parent->na_next_pipe;
800 if (n != na->pna_parent_slot) {
801 struct nexus_upipe_adapter **p =
802 &parent->na_pipes[na->pna_parent_slot];
803 *p = parent->na_pipes[n];
804 (*p)->pna_parent_slot = na->pna_parent_slot;
805 }
806 parent->na_pipes[n] = NULL;
807 }
808
809 static int
nx_upipe_na_txsync(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags)810 nx_upipe_na_txsync(struct __kern_channel_ring *txkring, struct proc *p,
811 uint32_t flags)
812 {
813 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
814 volatile uint64_t *tx_tsync, *tx_tnote, *rx_tsync;
815 int sent = 0, ret = 0;
816
817 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
818 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
819 "flags 0x%x -> kr \"%s\" (0x%llx) krflags 0x%b ring %u",
820 sk_proc_name_address(p), sk_proc_pid(p), txkring->ckr_name,
821 SK_KVA(txkring), txkring->ckr_flags, CKRF_BITS,
822 txkring->ckr_ring_id, flags, rxkring->ckr_name, SK_KVA(rxkring),
823 rxkring->ckr_flags, CKRF_BITS, rxkring->ckr_ring_id);
824
825 /*
826 * Serialize write access to the transmit ring, since another
827 * thread coming down for rxsync might pick up pending slots.
828 */
829 ASSERT(txkring->ckr_owner == current_thread());
830
831 /*
832 * Record the time of sync and grab sync time of other side;
833 * use atomic store and load since we're not holding the
834 * lock used by the receive ring. This allows us to avoid
835 * the potentially costly membar_sync().
836 */
837 /* deconst */
838 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_ring->ring_sync_time);
839 atomic_set_64(tx_tsync, txkring->ckr_sync_time);
840
841 /*
842 * Read from the peer's kring, not its user ring; the peer's channel
843 * may be defunct, in which case it's unsafe to access its user ring.
844 */
845 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_sync_time);
846 tx_tnote = __DECONST(uint64_t *, &txkring->ckr_ring->ring_notify_time);
847 *tx_tnote = atomic_add_64_ov(rx_tsync, 0);
848
849 if (__probable(txkring->ckr_rhead != txkring->ckr_khead)) {
850 sent = nx_upipe_na_txsync_locked(txkring, p, flags,
851 &ret, FALSE);
852 }
853
854 if (sent != 0) {
855 (void) rxkring->ckr_na_notify(rxkring, p, 0);
856 }
857
858 return ret;
859 }
860
861 int
nx_upipe_na_txsync_locked(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags,int * ret,boolean_t rx)862 nx_upipe_na_txsync_locked(struct __kern_channel_ring *txkring, struct proc *p,
863 uint32_t flags, int *ret, boolean_t rx)
864 {
865 #pragma unused(p, flags, rx)
866 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
867 const slot_idx_t lim_tx = txkring->ckr_lim;
868 const slot_idx_t lim_rx = rxkring->ckr_lim;
869 slot_idx_t j, k;
870 int n, m, b, sent = 0;
871 uint32_t byte_count = 0;
872 int limit; /* max # of slots to transfer */
873
874 *ret = 0;
875
876 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
877 "%s(%d) kr \"%s\", kh %3u kt %3u | "
878 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
879 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
880 txkring->ckr_ktail, txkring->ckr_rhead,
881 txkring->ckr_rtail, rx ? "*" : "");
882 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
883 "%s(%d) kr \"%s\", kh %3u kt %3u | "
884 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
885 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
886 rxkring->ckr_ktail, rxkring->ckr_rhead,
887 rxkring->ckr_rtail, rx ? "*" : "");
888
889 if (__improbable(KR_DROP(txkring) || KR_DROP(rxkring))) {
890 *ret = ENXIO;
891 goto done;
892 }
893
894 j = rxkring->ckr_ktail; /* RX */
895 k = txkring->ckr_khead; /* TX */
896
897 /* # of new tx slots */
898 n = txkring->ckr_rhead - txkring->ckr_khead;
899 if (n < 0) {
900 n += txkring->ckr_num_slots;
901 }
902 limit = n;
903
904 /* # of rx busy (unclaimed) slots */
905 b = j - rxkring->ckr_khead;
906 if (b < 0) {
907 b += rxkring->ckr_num_slots;
908 }
909
910 /* # of rx avail free slots (subtract busy from max) */
911 m = lim_rx - b;
912 if (m < limit) {
913 limit = m;
914 }
915
916 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
917 "%s(%d) kr \"%s\" -> new %u, kr \"%s\" "
918 "-> free %u", sk_proc_name_address(p), sk_proc_pid(p),
919 txkring->ckr_name, n, rxkring->ckr_name, m);
920
921 /* rxring is full, or nothing to send? */
922 if (__improbable((sent = limit) == 0)) {
923 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
924 "%s(%d) kr \"%s\" -> %s%s",
925 sk_proc_name_address(p), sk_proc_pid(p), (n > m) ?
926 rxkring->ckr_name : txkring->ckr_name, ((n > m) ?
927 "no room avail" : "no new slots"),
928 (rx ? " (lost race, ok)" : ""));
929 goto done;
930 }
931
932 ASSERT(limit > 0);
933 while (limit--) {
934 struct __kern_slot_desc *ksd_tx = KR_KSD(txkring, k);
935 struct __user_slot_desc *usd_tx = KR_USD(txkring, k);
936 struct __kern_slot_desc *ksd_rx = KR_KSD(rxkring, j);
937 struct __user_slot_desc *usd_rx = KR_USD(rxkring, j);
938 struct __kern_quantum *kqum;
939
940 kqum = ksd_tx->sd_qum;
941 /*
942 * Packets failing internalization should be dropped in
943 * TX sync prologue.
944 */
945 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED |
946 QUM_F_FINALIZED)) == (QUM_F_INTERNALIZED |
947 QUM_F_FINALIZED));
948
949 byte_count += kqum->qum_len;
950
951 /*
952 * Swap the slots.
953 *
954 * XXX: [email protected] -- this bypasses the slot attach/detach
955 * interface, and needs to be changed when upipe adopts the
956 * packet APIs. SD_SWAP() will perform a block copy of the
957 * swap, and will readjust the kernel slot descriptor's sd_user
958 * accordingly.
959 */
960 SD_SWAP(ksd_rx, usd_rx, ksd_tx, usd_tx);
961
962 j = SLOT_NEXT(j, lim_rx);
963 k = SLOT_NEXT(k, lim_tx);
964 }
965
966 kr_update_stats(rxkring, sent, byte_count);
967 if (__improbable(kr_stat_enable != 0)) {
968 txkring->ckr_stats = rxkring->ckr_stats;
969 }
970
971 /*
972 * Make sure the slots are updated before ckr_ktail reach global
973 * visibility, since we are not holding rx ring's kr_enter().
974 */
975 membar_sync();
976
977 rxkring->ckr_ktail = j;
978 txkring->ckr_khead = k;
979 txkring->ckr_ktail = SLOT_PREV(k, lim_tx);
980
981 done:
982 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
983 "%s(%d) kr \"%s\", kh %3u kt %3u | "
984 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
985 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
986 txkring->ckr_ktail, txkring->ckr_rhead,
987 txkring->ckr_rtail, rx ? "*" : "");
988 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
989 "%s(%d) kr \"%s\", kh %3u kt %3u | "
990 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
991 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
992 rxkring->ckr_ktail, rxkring->ckr_rhead,
993 rxkring->ckr_rtail, rx ? "*" : "");
994
995 return sent;
996 }
997
998 static int
nx_upipe_na_rxsync(struct __kern_channel_ring * rxkring,struct proc * p,uint32_t flags)999 nx_upipe_na_rxsync(struct __kern_channel_ring *rxkring, struct proc *p,
1000 uint32_t flags)
1001 {
1002 #pragma unused(p)
1003 struct __kern_channel_ring *txkring = rxkring->ckr_pipe;
1004 volatile uint64_t *rx_tsync, *rx_tnote, *tx_tsync;
1005 const slot_idx_t lim_rx = rxkring->ckr_lim;
1006 int n; /* new slots from transmit side */
1007 int m, b, ret = 0;
1008 uint32_t r;
1009
1010 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1011 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
1012 "flags 0x%x <- kr \"%s\" (0x%llx) krflags 0x%b ring %u",
1013 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1014 SK_KVA(rxkring), rxkring->ckr_flags, CKRF_BITS,
1015 rxkring->ckr_ring_id, flags, txkring->ckr_name, SK_KVA(txkring),
1016 txkring->ckr_flags, CKRF_BITS, txkring->ckr_ring_id);
1017
1018 ASSERT(rxkring->ckr_owner == current_thread());
1019
1020 /* reclaim and get # of rx reclaimed slots */
1021 r = kr_reclaim(rxkring);
1022
1023 /* # of rx busy (unclaimed) slots */
1024 b = rxkring->ckr_ktail - rxkring->ckr_khead;
1025 if (b < 0) {
1026 b += rxkring->ckr_num_slots;
1027 }
1028
1029 /* # of rx avail free slots (subtract busy from max) */
1030 m = lim_rx - b;
1031
1032 /*
1033 * Check if there's any new slots on transmit ring; do this
1034 * first without acquiring that ring's ckr_qlock, and use
1035 * the memory barrier (paired with second one in txsync.)
1036 * If we missed the race we'd just pay the cost of acquiring
1037 * ckr_qlock and potentially returning from "internal txsync"
1038 * without anything to process, which is okay.
1039 */
1040 membar_sync();
1041 n = txkring->ckr_rhead - txkring->ckr_khead;
1042 if (n < 0) {
1043 n += txkring->ckr_num_slots;
1044 }
1045
1046 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1047 "%s(%d) kr \"%s\" <- free %u, kr \"%s\" <- new %u",
1048 sk_proc_name_address(p), sk_proc_pid(p),
1049 rxkring->ckr_name, m, txkring->ckr_name, n);
1050
1051 /*
1052 * Record the time of sync and grab sync time of other side;
1053 * use atomic store and load since we're not holding the
1054 * lock used by the receive ring. This allows us to avoid
1055 * the potentially costly membar_sync().
1056 */
1057 /* deconst */
1058 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_sync_time);
1059 atomic_set_64(rx_tsync, rxkring->ckr_sync_time);
1060
1061 /*
1062 * Read from the peer's kring, not its user ring; the peer's channel
1063 * may be defunct, in which case it's unsafe to access its user ring.
1064 */
1065 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_sync_time);
1066 rx_tnote = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_notify_time);
1067 *rx_tnote = atomic_add_64_ov(tx_tsync, 0);
1068
1069 /*
1070 * If we have slots to pick up from the transmit side and and we
1071 * have space available, perform an equivalent of "internal txsync".
1072 *
1073 * Acquire write access to the transmit (peer) ring,
1074 * Serialize write access to it, since another thread
1075 * coming down for txsync might add new slots.
1076 * If we fail to get the kring lock, then don't worry because
1077 * there's already a transmit sync in progress to move packets.
1078 */
1079 if (__probable(n != 0 && m != 0 && (flags & NA_SYNCF_MONITOR) == 0)) {
1080 (void) kr_enter(txkring, TRUE);
1081 n = nx_upipe_na_txsync_locked(txkring, p, flags, &ret, TRUE);
1082 kr_exit(txkring);
1083 } else {
1084 n = 0;
1085 }
1086
1087 /*
1088 * If we have reclaimed some slots or transferred new slots
1089 * from the transmit side, notify the other end. Also notify
1090 * ourselves to pick up newly transferred ones, if any.
1091 */
1092 if (__probable(r != 0 || n != 0)) {
1093 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1094 "%s(%d) kr \"%s\", kh %3u kt %3u | "
1095 "rh %3u rt %3u [rel %u new %u]",
1096 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1097 rxkring->ckr_khead, rxkring->ckr_ktail,
1098 rxkring->ckr_rhead, rxkring->ckr_rtail, r, n);
1099
1100 (void) txkring->ckr_na_notify(txkring, p, 0);
1101 }
1102
1103 return ret;
1104 }
1105
1106 static int
nx_upipe_na_rings_create(struct nexus_adapter * na,struct kern_channel * ch)1107 nx_upipe_na_rings_create(struct nexus_adapter *na, struct kern_channel *ch)
1108 {
1109 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1110 struct nexus_adapter *ona = &pna->pna_peer->pna_up;
1111 int error = 0;
1112 enum txrx t;
1113 uint32_t i;
1114
1115 /*
1116 * Create krings and all the rings for this end;
1117 * we'll update ckr_save_ring pointers below.
1118 */
1119 error = na_rings_mem_setup(na, 0, FALSE, ch);
1120 if (error != 0) {
1121 goto err;
1122 }
1123
1124 /* update our hidden ring pointers */
1125 for_rx_tx(t) {
1126 for (i = 0; i < na_get_nrings(na, t); i++) {
1127 NAKR(na, t)[i].ckr_save_ring =
1128 NAKR(na, t)[i].ckr_ring;
1129 }
1130 }
1131
1132 /* now, create krings and rings of the other end */
1133 error = na_rings_mem_setup(ona, 0, FALSE, ch);
1134 if (error != 0) {
1135 na_rings_mem_teardown(na, ch, FALSE); /* this end */
1136 goto err;
1137 }
1138
1139 for_rx_tx(t) {
1140 for (i = 0; i < na_get_nrings(ona, t); i++) {
1141 NAKR(ona, t)[i].ckr_save_ring =
1142 NAKR(ona, t)[i].ckr_ring;
1143 }
1144 }
1145
1146 /* cross link the krings */
1147 for_rx_tx(t) {
1148 /* swap NR_TX <-> NR_RX (skip host ring) */
1149 enum txrx r = sk_txrx_swap(t);
1150 for (i = 0; i < na_get_nrings(na, t); i++) {
1151 NAKR(na, t)[i].ckr_pipe =
1152 NAKR(&pna->pna_peer->pna_up, r) + i;
1153 NAKR(&pna->pna_peer->pna_up, r)[i].ckr_pipe =
1154 NAKR(na, t) + i;
1155 }
1156 }
1157 err:
1158 return error;
1159 }
1160
1161 /*
1162 * Pipe endpoints are created and destroyed together, so that endopoints do not
1163 * have to check for the existence of their peer at each ?xsync.
1164 *
1165 * To play well with the existing nexus adapter infrastructure (refcounts etc.),
1166 * we adopt the following strategy:
1167 *
1168 * 1) The first endpoint that is created also creates the other endpoint and
1169 * grabs a reference to it.
1170 *
1171 * state A) user1 --> endpoint1 --> endpoint2
1172 *
1173 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
1174 * its reference to the user:
1175 *
1176 * state B) user1 --> endpoint1 endpoint2 <--- user2
1177 *
1178 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
1179 * callback endpoint2 notes that endpoint1 is still active and adds a reference
1180 * from endpoint1 to itself. When user2 then releases her own reference,
1181 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
1182 * would be reached if endpoint1 were released instead.
1183 *
1184 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
1185 * it owns a reference to endpoint2 and releases it.
1186 *
1187 * Something similar goes on for the creation and destruction of the krings.
1188 */
1189
1190
1191 /*
1192 * nx_upipe_na_krings_create.
1193 *
1194 * There are two cases:
1195 *
1196 * 1) state is
1197 *
1198 * usr1 --> e1 --> e2
1199 *
1200 * and we are e1. We have to create both sets
1201 * of krings.
1202 *
1203 * 2) state is
1204 *
1205 * usr1 --> e1 --> e2
1206 *
1207 * and we are e2. e1 is certainly registered and our
1208 * krings already exist, but they may be hidden.
1209 */
1210 static int
nx_upipe_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)1211 nx_upipe_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
1212 {
1213 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1214 int error = 0;
1215 enum txrx t;
1216 uint32_t i;
1217
1218 /*
1219 * Verify symmetrical ring counts; validated
1220 * at nexus provider registration time.
1221 */
1222 ASSERT(na_get_nrings(na, NR_TX) == na_get_nrings(na, NR_RX));
1223
1224 if (pna->pna_peer_ref) {
1225 /* case 1) above */
1226 SK_DF(SK_VERB_USER_PIPE,
1227 "0x%llx: case 1, create everything", SK_KVA(na));
1228 error = nx_upipe_na_rings_create(na, ch);
1229 } else {
1230 /* case 2) above */
1231 /* recover the hidden rings */
1232 SK_DF(SK_VERB_USER_PIPE,
1233 "0x%llx: case 2, hidden rings", SK_KVA(na));
1234 for_rx_tx(t) {
1235 for (i = 0; i < na_get_nrings(na, t); i++) {
1236 NAKR(na, t)[i].ckr_ring =
1237 NAKR(na, t)[i].ckr_save_ring;
1238 }
1239 }
1240 }
1241
1242 ASSERT(error == 0 || (na->na_tx_rings == NULL &&
1243 na->na_rx_rings == NULL && na->na_slot_ctxs == NULL));
1244 ASSERT(error == 0 || (pna->pna_peer->pna_up.na_tx_rings == NULL &&
1245 pna->pna_peer->pna_up.na_rx_rings == NULL &&
1246 pna->pna_peer->pna_up.na_slot_ctxs == NULL));
1247
1248 return error;
1249 }
1250
1251 /*
1252 * nx_upipe_na_activate.
1253 *
1254 * There are two cases on registration (onoff==1)
1255 *
1256 * 1.a) state is
1257 *
1258 * usr1 --> e1 --> e2
1259 *
1260 * and we are e1. Nothing special to do.
1261 *
1262 * 1.b) state is
1263 *
1264 * usr1 --> e1 --> e2 <-- usr2
1265 *
1266 * and we are e2. Drop the ref e1 is holding.
1267 *
1268 * There are two additional cases on unregister (onoff==0)
1269 *
1270 * 2.a) state is
1271 *
1272 * usr1 --> e1 --> e2
1273 *
1274 * and we are e1. Nothing special to do, e2 will
1275 * be cleaned up by the destructor of e1.
1276 *
1277 * 2.b) state is
1278 *
1279 * usr1 --> e1 e2 <-- usr2
1280 *
1281 * and we are either e1 or e2. Add a ref from the
1282 * other end and hide our rings.
1283 */
1284 static int
nx_upipe_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1285 nx_upipe_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1286 {
1287 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1288
1289 SK_LOCK_ASSERT_HELD();
1290
1291 SK_DF(SK_VERB_USER_PIPE, "na \"%s\" (0x%llx) %s", na->na_name,
1292 SK_KVA(na), na_activate_mode2str(mode));
1293
1294 switch (mode) {
1295 case NA_ACTIVATE_MODE_ON:
1296 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
1297 break;
1298
1299 case NA_ACTIVATE_MODE_DEFUNCT:
1300 break;
1301
1302 case NA_ACTIVATE_MODE_OFF:
1303 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
1304 break;
1305
1306 default:
1307 VERIFY(0);
1308 /* NOTREACHED */
1309 __builtin_unreachable();
1310 }
1311
1312 if (pna->pna_peer_ref) {
1313 SK_DF(SK_VERB_USER_PIPE,
1314 "0x%llx: case 1.a or 2.a, nothing to do", SK_KVA(na));
1315 return 0;
1316 }
1317
1318 switch (mode) {
1319 case NA_ACTIVATE_MODE_ON:
1320 SK_DF(SK_VERB_USER_PIPE,
1321 "0x%llx: case 1.b, drop peer", SK_KVA(na));
1322 if (pna->pna_peer->pna_peer_ref) {
1323 pna->pna_peer->pna_peer_ref = FALSE;
1324 (void) na_release_locked(na);
1325 }
1326 break;
1327
1328 case NA_ACTIVATE_MODE_OFF:
1329 SK_DF(SK_VERB_USER_PIPE,
1330 "0x%llx: case 2.b, grab peer", SK_KVA(na));
1331 if (!pna->pna_peer->pna_peer_ref) {
1332 na_retain_locked(na);
1333 pna->pna_peer->pna_peer_ref = TRUE;
1334 }
1335 break;
1336
1337 default:
1338 break;
1339 }
1340
1341 return 0;
1342 }
1343
1344 /*
1345 * nx_upipe_na_krings_delete.
1346 *
1347 * There are two cases:
1348 *
1349 * 1) state is
1350 *
1351 * usr1 --> e1 --> e2
1352 *
1353 * and we are e1 (e2 is not bound, so krings_delete cannot be
1354 * called on it);
1355 *
1356 * 2) state is
1357 *
1358 * usr1 --> e1 e2 <-- usr2
1359 *
1360 * and we are either e1 or e2.
1361 *
1362 * In the former case we have to also delete the krings of e2;
1363 * in the latter case we do nothing (note that our krings
1364 * have already been hidden in the unregister callback).
1365 */
1366 static void
nx_upipe_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)1367 nx_upipe_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
1368 boolean_t defunct)
1369 {
1370 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1371 struct nexus_adapter *ona; /* na of the other end */
1372 uint32_t i;
1373 enum txrx t;
1374
1375 SK_LOCK_ASSERT_HELD();
1376
1377 if (!pna->pna_peer_ref) {
1378 SK_DF(SK_VERB_USER_PIPE,
1379 "0x%llx: case 2, kept alive by peer", SK_KVA(na));
1380 /*
1381 * If adapter is defunct (note the explicit test against
1382 * NAF_DEFUNCT, and not the "defunct" parameter passed in
1383 * by the caller), then the peer's channel has gone defunct.
1384 * We get here because this channel was not defuncted, and
1385 * that this is the last active reference to the adapter.
1386 * At this point we tear everything down, since the caller
1387 * will proceed to destroying the memory regions.
1388 */
1389 if (na->na_flags & NAF_DEFUNCT) {
1390 na_rings_mem_teardown(na, ch, defunct);
1391 }
1392 return;
1393 }
1394
1395 /* case 1) above */
1396 SK_DF(SK_VERB_USER_PIPE,
1397 "0x%llx: case 1, deleting everyhing", SK_KVA(na));
1398
1399 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
1400
1401 /* restore the ring to be deleted on the peer */
1402 ona = &pna->pna_peer->pna_up;
1403 if (ona->na_tx_rings == NULL) {
1404 /*
1405 * Already deleted, we must be on an
1406 * cleanup-after-error path
1407 * Just delete this end
1408 */
1409 na_rings_mem_teardown(na, ch, defunct);
1410 return;
1411 }
1412
1413 /* delete the memory rings */
1414 na_rings_mem_teardown(na, ch, defunct);
1415
1416 if (!defunct) {
1417 for_rx_tx(t) {
1418 for (i = 0; i < na_get_nrings(ona, t); i++) {
1419 NAKR(ona, t)[i].ckr_ring =
1420 NAKR(ona, t)[i].ckr_save_ring;
1421 }
1422 }
1423 }
1424
1425 /* Delete the memory rings */
1426 na_rings_mem_teardown(ona, ch, defunct);
1427 }
1428
1429 static void
nx_upipe_na_dtor(struct nexus_adapter * na)1430 nx_upipe_na_dtor(struct nexus_adapter *na)
1431 {
1432 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1433 struct nx_upipe *u = NX_UPIPE_PRIVATE(na->na_nx);
1434
1435 SK_LOCK_ASSERT_HELD();
1436
1437 SK_DF(SK_VERB_USER_PIPE, "0x%llx", SK_KVA(na));
1438 if (pna->pna_peer_ref) {
1439 SK_DF(SK_VERB_USER_PIPE,
1440 "0x%llx: clean up peer 0x%llx", SK_KVA(na),
1441 SK_KVA(&pna->pna_peer->pna_up));
1442 pna->pna_peer_ref = FALSE;
1443 (void) na_release_locked(&pna->pna_peer->pna_up);
1444 }
1445 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
1446 nx_upipe_na_remove(pna->pna_parent, pna);
1447 }
1448 (void) na_release_locked(pna->pna_parent);
1449 pna->pna_parent = NULL;
1450
1451 /* release reference to parent adapter held by nx_upipe_na_find() */
1452 ASSERT(u->nup_pna_users != 0);
1453 if (--u->nup_pna_users == 0) {
1454 ASSERT(u->nup_pna != NULL);
1455 SK_DF(SK_VERB_USER_PIPE, "release parent: \"%s\" (0x%llx)",
1456 u->nup_pna->na_name, SK_KVA(u->nup_pna));
1457 na_release_locked(u->nup_pna);
1458 u->nup_pna = NULL;
1459 }
1460 }
1461
1462 int
nx_upipe_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1463 nx_upipe_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1464 struct chreq *chr, struct nxbind *nxb, struct proc *p,
1465 struct nexus_adapter **na, boolean_t create)
1466 {
1467 #pragma unused(ch, p)
1468 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
1469 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1470 struct nexus_adapter *pna = NULL; /* parent adapter */
1471 boolean_t anon = NX_ANONYMOUS_PROV(nx);
1472 struct nexus_upipe_adapter *mna, *sna, *req;
1473 ch_endpoint_t ep = chr->cr_endpoint;
1474 uint32_t pipe_id;
1475 int error;
1476
1477 SK_LOCK_ASSERT_HELD();
1478 *na = NULL;
1479
1480 #if SK_LOG
1481 uuid_string_t uuidstr;
1482 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1483 "ring_id %d ring_set %u ep_type %u:%u create %u%s",
1484 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1485 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1486 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1487 chr->cr_real_endpoint, chr->cr_endpoint, create,
1488 (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1489 ep != CH_ENDPOINT_USER_PIPE_SLAVE) ? " (skipped)" : "");
1490 #endif /* SK_LOG */
1491
1492 if (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1493 ep != CH_ENDPOINT_USER_PIPE_SLAVE) {
1494 return 0;
1495 }
1496
1497 /*
1498 * Check client credentials.
1499 */
1500 if (chr->cr_port == NEXUS_PORT_USER_PIPE_SERVER) {
1501 if (!anon && (u->nup_srv_nxb == NULL || nxb == NULL ||
1502 !nxb_is_equal(u->nup_srv_nxb, nxb))) {
1503 return EACCES;
1504 }
1505 } else {
1506 ASSERT(chr->cr_port == NEXUS_PORT_USER_PIPE_CLIENT);
1507 if (!anon && (u->nup_cli_nxb == NULL || nxb == NULL ||
1508 !nxb_is_equal(u->nup_cli_nxb, nxb))) {
1509 return EACCES;
1510 }
1511 }
1512
1513 /*
1514 * First, try to find a previously-created parent adapter
1515 * for this nexus; else, create one and store it in the
1516 * nexus. We'll release this at nexus destructor time.
1517 */
1518 if ((pna = u->nup_pna) != NULL) {
1519 na_retain_locked(pna); /* for us */
1520 SK_DF(SK_VERB_USER_PIPE, "found parent: \"%s\" (0x%llx)",
1521 pna->na_name, SK_KVA(pna));
1522 } else {
1523 /* callee will hold a reference for us upon success */
1524 error = na_pseudo_create(nx, chr, &pna);
1525 if (error != 0) {
1526 SK_ERR("parent create failed: %d", error);
1527 return error;
1528 }
1529 /* hold an extra reference for nx_upipe */
1530 u->nup_pna = pna;
1531 na_retain_locked(pna);
1532 SK_DF(SK_VERB_USER_PIPE, "created parent: \"%s\" (0x%llx)",
1533 pna->na_name, SK_KVA(pna));
1534 }
1535
1536 /* next, lookup the pipe id in the parent list */
1537 req = NULL;
1538 pipe_id = chr->cr_pipe_id;
1539 mna = nx_upipe_find(pna, pipe_id);
1540 if (mna != NULL) {
1541 if (mna->pna_role == ep) {
1542 SK_DF(SK_VERB_USER_PIPE,
1543 "found pipe_id %u directly at slot %u",
1544 pipe_id, mna->pna_parent_slot);
1545 req = mna;
1546 } else {
1547 SK_DF(SK_VERB_USER_PIPE,
1548 "found pipe_id %u indirectly at slot %u",
1549 pipe_id, mna->pna_parent_slot);
1550 req = mna->pna_peer;
1551 }
1552 /*
1553 * The pipe we have found already holds a ref to the parent,
1554 * so we need to drop the one we got from above.
1555 */
1556 (void) na_release_locked(pna);
1557 goto found;
1558 }
1559 SK_DF(SK_VERB_USER_PIPE,
1560 "pipe_id %u not found, create %u", pipe_id, create);
1561 if (!create) {
1562 error = ENODEV;
1563 goto put_out;
1564 }
1565 /*
1566 * We create both master and slave.
1567 * The endpoint we were asked for holds a reference to
1568 * the other one.
1569 */
1570 mna = na_upipe_alloc(Z_WAITOK);
1571
1572 ASSERT(mna->pna_up.na_type == NA_USER_PIPE);
1573 ASSERT(mna->pna_up.na_free == na_upipe_free);
1574
1575 (void) snprintf(mna->pna_up.na_name, sizeof(mna->pna_up.na_name),
1576 "%s{%u", pna->na_name, pipe_id);
1577 uuid_generate_random(mna->pna_up.na_uuid);
1578
1579 mna->pna_id = pipe_id;
1580 mna->pna_role = CH_ENDPOINT_USER_PIPE_MASTER;
1581 mna->pna_parent = pna;
1582 mna->pna_up.na_txsync = nx_upipe_na_txsync;
1583 mna->pna_up.na_rxsync = nx_upipe_na_rxsync;
1584 mna->pna_up.na_activate = nx_upipe_na_activate;
1585 mna->pna_up.na_dtor = nx_upipe_na_dtor;
1586 mna->pna_up.na_krings_create = nx_upipe_na_krings_create;
1587 mna->pna_up.na_krings_delete = nx_upipe_na_krings_delete;
1588 mna->pna_up.na_arena = pna->na_arena;
1589 skmem_arena_retain((&mna->pna_up)->na_arena);
1590 atomic_bitset_32(&mna->pna_up.na_flags, NAF_MEM_LOANED);
1591 *(nexus_meta_type_t *)(uintptr_t)&mna->pna_up.na_md_type =
1592 pna->na_md_type;
1593 *(nexus_meta_subtype_t *)(uintptr_t)&mna->pna_up.na_md_subtype =
1594 pna->na_md_subtype;
1595
1596 *(nexus_stats_type_t *)(uintptr_t)&mna->pna_up.na_stats_type =
1597 NEXUS_STATS_TYPE_INVALID;
1598 *(uint32_t *)(uintptr_t)&mna->pna_up.na_flowadv_max =
1599 nxp->nxp_flowadv_max;
1600 ASSERT(mna->pna_up.na_flowadv_max == 0 ||
1601 skmem_arena_nexus(mna->pna_up.na_arena)->arn_flowadv_obj != NULL);
1602
1603 /*
1604 * Parent adapter parameters must match the nexus provider's by the
1605 * time we get here, since na_find() above shouldn't return
1606 * one otherwise.
1607 */
1608 na_set_nrings(&mna->pna_up, NR_TX, nxp->nxp_tx_rings);
1609 na_set_nrings(&mna->pna_up, NR_RX, nxp->nxp_rx_rings);
1610 na_set_nslots(&mna->pna_up, NR_TX, nxp->nxp_tx_slots);
1611 na_set_nslots(&mna->pna_up, NR_RX, nxp->nxp_rx_slots);
1612 ASSERT(na_get_nrings(&mna->pna_up, NR_TX) == na_get_nrings(pna, NR_TX));
1613 ASSERT(na_get_nrings(&mna->pna_up, NR_RX) == na_get_nrings(pna, NR_RX));
1614 ASSERT(na_get_nslots(&mna->pna_up, NR_TX) == na_get_nslots(pna, NR_TX));
1615 ASSERT(na_get_nslots(&mna->pna_up, NR_RX) == na_get_nslots(pna, NR_RX));
1616
1617 na_attach_common(&mna->pna_up, nx, &nx_upipe_prov_s);
1618
1619 /* register the master with the parent */
1620 error = nx_upipe_na_add(pna, mna);
1621 if (error != 0) {
1622 goto free_mna;
1623 }
1624
1625 /* create the slave */
1626 sna = na_upipe_alloc(Z_WAITOK);
1627
1628 /* most fields are the same, copy from master and then fix */
1629 bcopy(mna, sna, sizeof(*sna));
1630 skmem_arena_retain((&sna->pna_up)->na_arena);
1631 atomic_bitset_32(&sna->pna_up.na_flags, NAF_MEM_LOANED);
1632
1633 ASSERT(sna->pna_up.na_type == NA_USER_PIPE);
1634 ASSERT(sna->pna_up.na_free == na_upipe_free);
1635
1636 (void) snprintf(sna->pna_up.na_name, sizeof(sna->pna_up.na_name),
1637 "%s}%d", pna->na_name, pipe_id);
1638 uuid_generate_random(sna->pna_up.na_uuid);
1639
1640 sna->pna_role = CH_ENDPOINT_USER_PIPE_SLAVE;
1641 na_attach_common(&sna->pna_up, nx, &nx_upipe_prov_s);
1642
1643 /* join the two endpoints */
1644 mna->pna_peer = sna;
1645 sna->pna_peer = mna;
1646
1647 /*
1648 * We already have a reference to the parent, but we
1649 * need another one for the other endpoint we created
1650 */
1651 na_retain_locked(pna);
1652
1653 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) != 0) {
1654 atomic_bitset_32(&pna->na_flags, NAF_DEFUNCT_OK);
1655 }
1656
1657 if (ep == CH_ENDPOINT_USER_PIPE_MASTER) {
1658 req = mna;
1659 mna->pna_peer_ref = TRUE;
1660 na_retain_locked(&sna->pna_up);
1661 } else {
1662 req = sna;
1663 sna->pna_peer_ref = TRUE;
1664 na_retain_locked(&mna->pna_up);
1665 }
1666
1667 /* parent adapter now has two users (mna and sna) */
1668 u->nup_pna_users += 2;
1669
1670 #if SK_LOG
1671 SK_DF(SK_VERB_USER_PIPE, "created master 0x%llx and slave 0x%llx",
1672 SK_KVA(mna), SK_KVA(sna));
1673 SK_DF(SK_VERB_USER_PIPE, "mna: \"%s\"", mna->pna_up.na_name);
1674 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1675 sk_uuid_unparse(mna->pna_up.na_uuid, uuidstr));
1676 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1677 SK_KVA(mna->pna_up.na_nx), NX_DOM(mna->pna_up.na_nx)->nxdom_name,
1678 NX_DOM_PROV(mna->pna_up.na_nx)->nxdom_prov_name);
1679 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1680 mna->pna_up.na_flags, NAF_BITS);
1681 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1682 mna->pna_up.na_flowadv_max);
1683 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1684 na_get_nrings(&mna->pna_up, NR_TX),
1685 na_get_nrings(&mna->pna_up, NR_RX));
1686 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1687 na_get_nslots(&mna->pna_up, NR_TX),
1688 na_get_nslots(&mna->pna_up, NR_RX));
1689 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", mna->pna_up.na_next_pipe);
1690 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", mna->pna_up.na_max_pipes);
1691 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1692 mna->pna_parent->na_name);
1693 SK_DF(SK_VERB_USER_PIPE, " id: %u", mna->pna_id);
1694 SK_DF(SK_VERB_USER_PIPE, " role: %u", mna->pna_role);
1695 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", mna->pna_peer_ref);
1696 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", mna->pna_parent_slot);
1697 SK_DF(SK_VERB_USER_PIPE, "sna: \"%s\"", sna->pna_up.na_name);
1698 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1699 sk_uuid_unparse(sna->pna_up.na_uuid, uuidstr));
1700 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1701 SK_KVA(sna->pna_up.na_nx), NX_DOM(sna->pna_up.na_nx)->nxdom_name,
1702 NX_DOM_PROV(sna->pna_up.na_nx)->nxdom_prov_name);
1703 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1704 sna->pna_up.na_flags, NAF_BITS);
1705 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1706 sna->pna_up.na_flowadv_max);
1707 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1708 na_get_nrings(&sna->pna_up, NR_TX),
1709 na_get_nrings(&sna->pna_up, NR_RX));
1710 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1711 na_get_nslots(&sna->pna_up, NR_TX),
1712 na_get_nslots(&sna->pna_up, NR_RX));
1713 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", sna->pna_up.na_next_pipe);
1714 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", sna->pna_up.na_max_pipes);
1715 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1716 sna->pna_parent->na_name);
1717 SK_DF(SK_VERB_USER_PIPE, " id: %u", sna->pna_id);
1718 SK_DF(SK_VERB_USER_PIPE, " role: %u", sna->pna_role);
1719 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", sna->pna_peer_ref);
1720 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", sna->pna_parent_slot);
1721 #endif /* SK_LOG */
1722
1723 found:
1724
1725 SK_DF(SK_VERB_USER_PIPE, "pipe_id %u role %s at 0x%llx", pipe_id,
1726 (req->pna_role == CH_ENDPOINT_USER_PIPE_MASTER ?
1727 "master" : "slave"), SK_KVA(req));
1728 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) == 0) {
1729 atomic_bitclear_32(&pna->na_flags, NAF_DEFUNCT_OK);
1730 }
1731 *na = &req->pna_up;
1732 na_retain_locked(*na);
1733
1734 /*
1735 * Keep the reference to the parent; it will be released
1736 * by the adapter's destructor.
1737 */
1738 return 0;
1739
1740 free_mna:
1741 if (mna->pna_up.na_arena != NULL) {
1742 skmem_arena_release((&mna->pna_up)->na_arena);
1743 mna->pna_up.na_arena = NULL;
1744 }
1745 NA_FREE(&mna->pna_up);
1746 put_out:
1747 (void) na_release_locked(pna);
1748 return error;
1749 }
1750
1751 static struct nx_upipe *
nx_upipe_alloc(zalloc_flags_t how)1752 nx_upipe_alloc(zalloc_flags_t how)
1753 {
1754 struct nx_upipe *u;
1755
1756 SK_LOCK_ASSERT_HELD();
1757
1758 u = zalloc_flags(nx_upipe_zone, how | Z_ZERO);
1759 if (u) {
1760 SK_DF(SK_VERB_MEM, "upipe 0x%llx ALLOC", SK_KVA(u));
1761 }
1762 return u;
1763 }
1764
1765 static void
nx_upipe_free(struct nx_upipe * u)1766 nx_upipe_free(struct nx_upipe *u)
1767 {
1768 ASSERT(u->nup_pna == NULL);
1769 ASSERT(u->nup_pna_users == 0);
1770 ASSERT(u->nup_cli_nxb == NULL);
1771 ASSERT(u->nup_srv_nxb == NULL);
1772
1773 SK_DF(SK_VERB_MEM, "upipe 0x%llx FREE", SK_KVA(u));
1774 zfree(nx_upipe_zone, u);
1775 }
1776