1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 /*
55 * $FreeBSD$
56 *
57 * Monitors
58 *
59 * netmap monitors can be used to do monitoring of network traffic
60 * on another adapter, when the latter adapter is working in netmap mode.
61 *
62 * Monitors offer to userspace the same interface as any other netmap port,
63 * with as many pairs of netmap rings as the monitored adapter.
64 * However, only the rx rings are actually used. Each monitor rx ring receives
65 * the traffic transiting on both the tx and rx corresponding rings in the
66 * monitored adapter. During registration, the user can choose if she wants
67 * to intercept tx only, rx only, or both tx and rx traffic.
68 *
69 * If the monitor is not able to cope with the stream of frames, excess traffic
70 * will be dropped.
71 *
72 * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
73 *
74 * Monitors can be either zero-copy or copy-based.
75 *
76 * Copy monitors see the frames before they are consumed:
77 *
78 * - For tx traffic, this is when the application sends them, before they are
79 * passed down to the adapter.
80 *
81 * - For rx traffic, this is when they are received by the adapter, before
82 * they are sent up to the application, if any (note that, if no
83 * application is reading from a monitored ring, the ring will eventually
84 * fill up and traffic will stop).
85 *
86 * Zero-copy monitors only see the frames after they have been consumed:
87 *
88 * - For tx traffic, this is after the slots containing the frames have been
89 * marked as free. Note that this may happen at a considerably delay after
90 * frame transmission, since freeing of slots is often done lazily.
91 *
92 * - For rx traffic, this is after the consumer on the monitored adapter
93 * has released them. In most cases, the consumer is a userspace
94 * application which may have modified the frame contents.
95 *
96 * Several copy monitors may be active on any ring. Zero-copy monitors,
97 * instead, need exclusive access to each of the monitored rings. This may
98 * change in the future, if we implement zero-copy monitor chaining.
99 *
100 */
101
102 #include <skywalk/os_skywalk_private.h>
103 #include <skywalk/nexus/monitor/nx_monitor.h>
104
105 static int nx_mon_na_txsync(struct __kern_channel_ring *, struct proc *,
106 uint32_t);
107 static int nx_mon_na_rxsync(struct __kern_channel_ring *, struct proc *,
108 uint32_t);
109 static int nx_mon_na_krings_create(struct nexus_adapter *,
110 struct kern_channel *);
111 static void nx_mon_na_krings_delete(struct nexus_adapter *,
112 struct kern_channel *, boolean_t);
113 static uint32_t nx_mon_txrx2chmode(enum txrx);
114 static int nx_mon_kr_alloc(struct __kern_channel_ring *, uint32_t);
115 static void nx_mon_kr_dealloc(struct __kern_channel_ring *);
116 static int nx_mon_na_krings_locks(struct nexus_adapter *,
117 uint32_t[NR_TXRX], uint32_t[NR_TXRX]);
118 static void nx_mon_na_krings_unlock(struct nexus_adapter *,
119 const uint32_t[NR_TXRX], const uint32_t[NR_TXRX]);
120 static int nx_mon_enable(struct nexus_adapter *, int);
121 static void nx_mon_disable(struct nexus_adapter *);
122 static int nx_mon_add(struct __kern_channel_ring *,
123 struct __kern_channel_ring *, boolean_t);
124 static void nx_mon_del(struct __kern_channel_ring *,
125 struct __kern_channel_ring *, boolean_t);
126 static int nx_mon_na_activate_common(struct nexus_adapter *,
127 na_activate_mode_t, boolean_t);
128 static pkt_copy_from_pkt_t nx_mon_quantum_copy_64x;
129
130 static int nx_mon_zcopy_parent_sync(struct __kern_channel_ring *,
131 struct proc *, uint32_t, enum txrx);
132 static int nx_mon_zcopy_na_activate(struct nexus_adapter *, na_activate_mode_t);
133 static void nx_mon_zcopy_na_dtor(struct nexus_adapter *);
134
135 static void nx_mon_parent_sync(struct __kern_channel_ring *, struct proc *,
136 slot_idx_t, int);
137 static int nx_mon_na_activate(struct nexus_adapter *, na_activate_mode_t);
138 static void nx_mon_na_dtor(struct nexus_adapter *);
139
140 /*
141 * monitors work by replacing the nm_sync() and possibly the
142 * nm_notify() callbacks in the monitored rings.
143 */
144 static int nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *,
145 struct proc *, uint32_t);
146 static int nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *,
147 struct proc *, uint32_t);
148 static int nx_mon_parent_txsync(struct __kern_channel_ring *,
149 struct proc *, uint32_t);
150 static int nx_mon_parent_rxsync(struct __kern_channel_ring *,
151 struct proc *, uint32_t);
152 static int nx_mon_parent_notify(struct __kern_channel_ring *,
153 struct proc *, uint32_t);
154
155 static void nx_mon_dom_init(struct nxdom *);
156 static void nx_mon_dom_terminate(struct nxdom *);
157 static void nx_mon_dom_fini(struct nxdom *);
158 static int nx_mon_dom_bind_port(struct kern_nexus *, nexus_port_t *,
159 struct nxbind *, void *);
160 static int nx_mon_dom_unbind_port(struct kern_nexus *, nexus_port_t);
161 static int nx_mon_dom_connect(struct kern_nexus_domain_provider *,
162 struct kern_nexus *, struct kern_channel *, struct chreq *,
163 struct kern_channel *, struct nxbind *, struct proc *);
164 static void nx_mon_dom_disconnect(struct kern_nexus_domain_provider *,
165 struct kern_nexus *, struct kern_channel *);
166 static void nx_mon_dom_defunct(struct kern_nexus_domain_provider *,
167 struct kern_nexus *, struct kern_channel *, struct proc *);
168 static void nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *,
169 struct kern_nexus *, struct kern_channel *, boolean_t);
170
171 static int nx_mon_prov_init(struct kern_nexus_domain_provider *);
172 static int nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *,
173 const struct nxprov_params *, struct nxprov_adjusted_params *);
174 static int nx_mon_prov_params(struct kern_nexus_domain_provider *,
175 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
176 struct skmem_region_params[SKMEM_REGIONS]);
177 static int nx_mon_prov_mem_new(struct kern_nexus_domain_provider *,
178 struct kern_nexus *, struct nexus_adapter *);
179 static void nx_mon_prov_fini(struct kern_nexus_domain_provider *);
180
181 static struct nexus_monitor_adapter *na_mon_alloc(zalloc_flags_t);
182 static void na_mon_free(struct nexus_adapter *);
183
184 struct nxdom nx_monitor_dom_s = {
185 .nxdom_prov_head =
186 STAILQ_HEAD_INITIALIZER(nx_monitor_dom_s.nxdom_prov_head),
187 .nxdom_type = NEXUS_TYPE_MONITOR,
188 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
189 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
190 .nxdom_name = "monitor",
191 /*
192 * The following values don't really matter much, as a monitor
193 * isn't usable on its own; we just define them as non-zeroes.
194 */
195 .nxdom_ports = {
196 .nb_def = 1,
197 .nb_min = 1,
198 .nb_max = 1,
199 },
200 .nxdom_tx_rings = {
201 .nb_def = 1,
202 .nb_min = 1,
203 .nb_max = 1,
204 },
205 .nxdom_rx_rings = {
206 .nb_def = 1,
207 .nb_min = 1,
208 .nb_max = 1,
209 },
210 .nxdom_tx_slots = {
211 .nb_def = 1,
212 .nb_min = 1,
213 .nb_max = 1,
214 },
215 .nxdom_rx_slots = {
216 .nb_def = 1,
217 .nb_min = 1,
218 .nb_max = 1,
219 },
220 .nxdom_buf_size = {
221 .nb_def = 64,
222 .nb_min = 64,
223 .nb_max = 64,
224 },
225 .nxdom_meta_size = {
226 .nb_def = NX_METADATA_OBJ_MIN_SZ,
227 .nb_min = NX_METADATA_OBJ_MIN_SZ,
228 .nb_max = NX_METADATA_USR_MAX_SZ,
229 },
230 .nxdom_stats_size = {
231 .nb_def = 0,
232 .nb_min = 0,
233 .nb_max = NX_STATS_MAX_SZ,
234 },
235 .nxdom_pipes = {
236 .nb_def = 0,
237 .nb_min = 0,
238 .nb_max = 0,
239 },
240 .nxdom_flowadv_max = {
241 .nb_def = 0,
242 .nb_min = 0,
243 .nb_max = NX_FLOWADV_MAX,
244 },
245 .nxdom_nexusadv_size = {
246 .nb_def = 0,
247 .nb_min = 0,
248 .nb_max = NX_NEXUSADV_MAX_SZ,
249 },
250 .nxdom_capabilities = {
251 .nb_def = NXPCAP_USER_CHANNEL,
252 .nb_min = NXPCAP_USER_CHANNEL,
253 .nb_max = NXPCAP_USER_CHANNEL,
254 },
255 .nxdom_qmap = {
256 .nb_def = NEXUS_QMAP_TYPE_INVALID,
257 .nb_min = NEXUS_QMAP_TYPE_INVALID,
258 .nb_max = NEXUS_QMAP_TYPE_INVALID,
259 },
260 .nxdom_max_frags = {
261 .nb_def = NX_PBUF_FRAGS_DEFAULT,
262 .nb_min = NX_PBUF_FRAGS_MIN,
263 .nb_max = NX_PBUF_FRAGS_DEFAULT,
264 },
265 .nxdom_init = nx_mon_dom_init,
266 .nxdom_terminate = nx_mon_dom_terminate,
267 .nxdom_fini = nx_mon_dom_fini,
268 .nxdom_find_port = NULL,
269 .nxdom_port_is_reserved = NULL,
270 .nxdom_bind_port = nx_mon_dom_bind_port,
271 .nxdom_unbind_port = nx_mon_dom_unbind_port,
272 .nxdom_connect = nx_mon_dom_connect,
273 .nxdom_disconnect = nx_mon_dom_disconnect,
274 .nxdom_defunct = nx_mon_dom_defunct,
275 .nxdom_defunct_finalize = nx_mon_dom_defunct_finalize,
276 };
277
278 static struct kern_nexus_domain_provider nx_monitor_prov_s = {
279 .nxdom_prov_name = NEXUS_PROVIDER_MONITOR,
280 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
281 .nxdom_prov_cb = {
282 .dp_cb_init = nx_mon_prov_init,
283 .dp_cb_fini = nx_mon_prov_fini,
284 .dp_cb_params = nx_mon_prov_params,
285 .dp_cb_mem_new = nx_mon_prov_mem_new,
286 .dp_cb_config = NULL,
287 .dp_cb_nx_ctor = NULL,
288 .dp_cb_nx_dtor = NULL,
289 .dp_cb_nx_mem_info = NULL, /* not supported */
290 .dp_cb_nx_mib_get = NULL,
291 },
292 };
293
294 static ZONE_DEFINE(na_mon_zone, SKMEM_ZONE_PREFIX ".na.mon",
295 sizeof(struct nexus_monitor_adapter), ZC_ZFREE_CLEARMEM);
296
297 #define SKMEM_TAG_MONITORS "com.apple.skywalk.monitors"
298 static SKMEM_TAG_DEFINE(skmem_tag_monitors, SKMEM_TAG_MONITORS);
299
300 static void
nx_mon_dom_init(struct nxdom * nxdom)301 nx_mon_dom_init(struct nxdom *nxdom)
302 {
303 SK_LOCK_ASSERT_HELD();
304 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
305
306 (void) nxdom_prov_add(nxdom, &nx_monitor_prov_s);
307 }
308
309 static void
nx_mon_dom_terminate(struct nxdom * nxdom)310 nx_mon_dom_terminate(struct nxdom *nxdom)
311 {
312 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
313
314 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
315 nxdom_prov_link, tnxdp) {
316 (void) nxdom_prov_del(nxdom_prov);
317 }
318 }
319
320 static void
nx_mon_dom_fini(struct nxdom * nxdom)321 nx_mon_dom_fini(struct nxdom *nxdom)
322 {
323 #pragma unused(nxdom)
324 }
325
326 __attribute__((noreturn))
327 static int
nx_mon_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)328 nx_mon_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
329 struct nxbind *nxb, void *info)
330 {
331 #pragma unused(nx, nx_port, nxb, info)
332 VERIFY(0);
333 /* NOTREACHED */
334 __builtin_unreachable();
335 }
336
337 __attribute__((noreturn))
338 static int
nx_mon_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)339 nx_mon_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
340 {
341 #pragma unused(nx, nx_port)
342 VERIFY(0);
343 /* NOTREACHED */
344 __builtin_unreachable();
345 }
346
347 __attribute__((noreturn))
348 static int
nx_mon_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)349 nx_mon_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
350 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
351 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
352 {
353 #pragma unused(nxdom_prov, nx, ch, chr, ch0, nxb, p)
354 VERIFY(0);
355 /* NOTREACHED */
356 __builtin_unreachable();
357 }
358
359 __attribute__((noreturn))
360 static void
nx_mon_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)361 nx_mon_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
362 struct kern_nexus *nx, struct kern_channel *ch)
363 {
364 #pragma unused(nxdom_prov, nx, ch)
365 VERIFY(0);
366 /* NOTREACHED */
367 __builtin_unreachable();
368 }
369
370 static void
nx_mon_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)371 nx_mon_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
372 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
373 {
374 #pragma unused(nxdom_prov, nx, ch, p)
375 }
376
377 static void
nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)378 nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
379 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
380 {
381 #pragma unused(nxdom_prov, nx, ch, locked)
382 }
383
384 static int
nx_mon_prov_init(struct kern_nexus_domain_provider * nxdom_prov)385 nx_mon_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
386 {
387 #pragma unused(nxdom_prov)
388 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
389 return 0;
390 }
391
392 static int
nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)393 nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
394 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
395 {
396 #pragma unused(nxdom_prov, nxp, adj)
397
398 return 0;
399 }
400
401 static int
nx_mon_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])402 nx_mon_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
403 const uint32_t req, const struct nxprov_params *nxp0,
404 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
405 {
406 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
407
408 return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
409 nxdom, nxdom, nxdom, nx_mon_prov_params_adjust);
410 }
411
412 static int
nx_mon_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)413 nx_mon_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
414 struct kern_nexus *nx, struct nexus_adapter *na)
415 {
416 #pragma unused(nxdom_prov)
417 int err = 0;
418
419 SK_DF(SK_VERB_MONITOR,
420 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
421 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
422 SK_KVA(na));
423
424 ASSERT(na->na_arena == NULL);
425 ASSERT(NX_USER_CHANNEL_PROV(nx));
426 /*
427 * The underlying nexus adapter uses the same memory allocator
428 * as the monitored adapter; don't store the pp in the nexus.
429 *
430 * This means that clients calling kern_nexus_get_pbufpool()
431 * will get NULL, but this is fine since we don't expose the
432 * monitor to external kernel clients.
433 */
434 na->na_arena = skmem_arena_create_for_nexus(na,
435 NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
436 FALSE, NULL, &err);
437 ASSERT(na->na_arena != NULL || err != 0);
438
439 return err;
440 }
441
442 static void
nx_mon_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)443 nx_mon_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
444 {
445 #pragma unused(nxdom_prov)
446 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
447 }
448
449 static struct nexus_monitor_adapter *
na_mon_alloc(zalloc_flags_t how)450 na_mon_alloc(zalloc_flags_t how)
451 {
452 struct nexus_monitor_adapter *mna;
453
454 _CASSERT(offsetof(struct nexus_monitor_adapter, mna_up) == 0);
455
456 mna = zalloc_flags(na_mon_zone, how | Z_ZERO);
457 if (mna) {
458 mna->mna_up.na_type = NA_MONITOR;
459 mna->mna_up.na_free = na_mon_free;
460 }
461 return mna;
462 }
463
464 static void
na_mon_free(struct nexus_adapter * na)465 na_mon_free(struct nexus_adapter *na)
466 {
467 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
468
469 ASSERT(mna->mna_up.na_refcount == 0);
470 SK_DF(SK_VERB_MEM, "mna 0x%llx FREE", SK_KVA(mna));
471 bzero(mna, sizeof(*mna));
472 zfree(na_mon_zone, mna);
473 }
474
475 /*
476 * Functions common to both kind of monitors.
477 */
478
479 /*
480 * nm_sync callback for the monitor's own tx rings.
481 * This makes no sense and always returns error
482 */
483 static int
nx_mon_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)484 nx_mon_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
485 uint32_t flags)
486 {
487 #pragma unused(kring, p, flags)
488 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_TX,
489 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
490 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
491 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
492 flags);
493 return EIO;
494 }
495
496 /*
497 * nm_sync callback for the monitor's own rx rings.
498 * Note that the lock in nx_mon_zcopy_parent_sync only protects
499 * writers among themselves. Synchronization between writers
500 * (i.e., nx_mon_zcopy_parent_txsync and nx_mon_zcopy_parent_rxsync)
501 * and readers (i.e., nx_mon_zcopy_parent_rxsync) relies on memory barriers.
502 */
503 static int
nx_mon_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)504 nx_mon_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
505 uint32_t flags)
506 {
507 #pragma unused(p, flags)
508 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_RX,
509 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
510 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
511 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
512 flags);
513 kring->ckr_khead = kring->ckr_rhead;
514 membar_sync();
515 return 0;
516 }
517
518 /*
519 * na_krings_create callbacks for monitors.
520 * We could use the default netmap_hw_krings_zmon, but
521 * we don't need the nx_mbq.
522 */
523 static int
nx_mon_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)524 nx_mon_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
525 {
526 ASSERT(na->na_type == NA_MONITOR);
527 return na_rings_mem_setup(na, 0, FALSE, ch);
528 }
529
530 /* na_krings_delete callback for monitors */
531 static void
nx_mon_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)532 nx_mon_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
533 boolean_t defunct)
534 {
535 ASSERT(na->na_type == NA_MONITOR);
536 na_rings_mem_teardown(na, ch, defunct);
537 }
538
539 __attribute__((always_inline))
540 static inline uint32_t
nx_mon_txrx2chmode(enum txrx t)541 nx_mon_txrx2chmode(enum txrx t)
542 {
543 return t == NR_RX ? CHMODE_MONITOR_RX : CHMODE_MONITOR_TX;
544 }
545
546 /* allocate the monitors array in the monitored kring */
547 static int
nx_mon_kr_alloc(struct __kern_channel_ring * kring,uint32_t n)548 nx_mon_kr_alloc(struct __kern_channel_ring *kring, uint32_t n)
549 {
550 struct __kern_channel_ring **nm;
551
552 if (n <= kring->ckr_max_monitors) {
553 /* we already have more entries that requested */
554 return 0;
555 }
556
557 nm = sk_realloc_type_array(struct __kern_channel_ring *,
558 kring->ckr_max_monitors, n, kring->ckr_monitors,
559 Z_WAITOK, skmem_tag_monitors);
560 if (nm == NULL) {
561 return ENOMEM;
562 }
563
564 kring->ckr_monitors = nm;
565 kring->ckr_max_monitors = n;
566
567 return 0;
568 }
569
570 /* deallocate the parent array in the parent adapter */
571 static void
nx_mon_kr_dealloc(struct __kern_channel_ring * kring)572 nx_mon_kr_dealloc(struct __kern_channel_ring *kring)
573 {
574 if (kring->ckr_monitors != NULL) {
575 if (kring->ckr_n_monitors > 0) {
576 SK_ERR("freeing not empty monitor array for \"%s\" "
577 "(%u dangling monitors)!", kring->ckr_name,
578 kring->ckr_n_monitors);
579 }
580 sk_free_type_array(struct __kern_channel_ring *,
581 kring->ckr_max_monitors, kring->ckr_monitors);
582 kring->ckr_monitors = NULL;
583 kring->ckr_max_monitors = 0;
584 kring->ckr_n_monitors = 0;
585 }
586 }
587
588 static int
nx_mon_na_krings_locks(struct nexus_adapter * na,uint32_t qfirst[NR_TXRX],uint32_t qlast[NR_TXRX])589 nx_mon_na_krings_locks(struct nexus_adapter *na,
590 uint32_t qfirst[NR_TXRX], uint32_t qlast[NR_TXRX])
591 {
592 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
593 struct nexus_adapter *pna = mna->mna_pna;
594 enum txrx t;
595 int err = 0;
596
597 for_rx_tx(t) {
598 uint32_t i;
599
600 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
601 continue;
602 }
603
604 qfirst[t] = qlast[t] = mna->mna_first[t];
605
606 /* synchronize with concurrently running nm_sync()s */
607 for (i = mna->mna_first[t]; i < mna->mna_last[t]; i++) {
608 struct __kern_channel_ring *kring;
609
610 /* the parent adapter's kring */
611 kring = &NAKR(pna, t)[i];
612 kr_stop(kring, KR_LOCKED);
613 qlast[t] = i + 1;
614 }
615 if (err != 0) {
616 break;
617 }
618 }
619
620 return err;
621 }
622
623 static void
nx_mon_na_krings_unlock(struct nexus_adapter * na,const uint32_t qfirst[NR_TXRX],const uint32_t qlast[NR_TXRX])624 nx_mon_na_krings_unlock(struct nexus_adapter *na,
625 const uint32_t qfirst[NR_TXRX], const uint32_t qlast[NR_TXRX])
626 {
627 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
628 struct nexus_adapter *pna = mna->mna_pna;
629 enum txrx t;
630
631 for_rx_tx(t) {
632 uint32_t i;
633
634 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
635 continue;
636 }
637
638 /* synchronize with concurrently running nm_sync()s */
639 for (i = qfirst[t]; i < qlast[t]; i++) {
640 struct __kern_channel_ring *kring;
641
642 /* the parent adapter's kring */
643 kring = &NAKR(pna, t)[i];
644 kr_start(kring);
645 }
646 }
647 }
648
649 static int
nx_mon_enable(struct nexus_adapter * na,boolean_t zcopy)650 nx_mon_enable(struct nexus_adapter *na, boolean_t zcopy)
651 {
652 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
653 struct nexus_adapter *pna = mna->mna_pna;
654 struct skmem_arena_nexus *na_arena = skmem_arena_nexus(pna->na_arena);
655 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
656 enum txrx t;
657 int err = 0;
658 uint32_t i;
659
660 ASSERT(!(na->na_flags & NAF_ACTIVE));
661
662 bzero(&qfirst, sizeof(qfirst));
663 bzero(&qlast, sizeof(qlast));
664
665 /*
666 * Acquire the target kring(s). q{first,last}0 represent the
667 * target ring set. q{first,last} represent the ones that have
668 * been successfully acquired. In the event the acquisition
669 * fails, we must release any previously-acquired rings.
670 */
671 if ((err = nx_mon_na_krings_locks(na, qfirst, qlast)) != 0) {
672 goto unlock;
673 }
674
675 ASSERT(na_arena->arn_rx_pp == na_arena->arn_tx_pp);
676 if (na_arena->arn_rx_pp->pp_max_frags > 1) {
677 VERIFY(na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET);
678 mna->mna_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
679 } else {
680 if (na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET) {
681 mna->mna_pkt_copy_from_pkt = pkt_copy_from_pkt;
682 } else {
683 mna->mna_pkt_copy_from_pkt = nx_mon_quantum_copy_64x;
684 }
685 }
686
687 for_rx_tx(t) {
688 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
689 continue;
690 }
691
692 for (i = qfirst[t]; i < qlast[t]; i++) {
693 struct __kern_channel_ring *kring, *mkring;
694
695 /* the parent adapter's kring */
696 kring = &NAKR(pna, t)[i];
697 mkring = &na->na_rx_rings[i];
698 err = nx_mon_add(mkring, kring, zcopy);
699 if (err != 0) {
700 break;
701 }
702 }
703 if (err != 0) {
704 break;
705 }
706 }
707
708 if (err == 0) {
709 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
710 goto unlock;
711 }
712
713 for_rx_tx(t) {
714 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
715 continue;
716 }
717
718 for (i = qfirst[t]; i < qlast[t]; i++) {
719 struct __kern_channel_ring *kring, *mkring;
720
721 /* the parent adapter's kring */
722 kring = &NAKR(pna, t)[i];
723 mkring = &na->na_rx_rings[i];
724 nx_mon_del(mkring, kring, FALSE);
725 }
726 }
727 ASSERT(!(na->na_flags & NAF_ACTIVE));
728
729 unlock:
730 nx_mon_na_krings_unlock(na, qfirst, qlast);
731
732 SK_DF(err ? SK_VERB_ERROR : SK_VERB_MONITOR,
733 "%s (0x%llx): mode 0x%x txrings[%u,%u], rxrings[%u,%u] err %d",
734 na->na_name, SK_KVA(na), mna->mna_mode, qfirst[NR_TX], qlast[NR_TX],
735 qfirst[NR_RX], qlast[NR_RX], err);
736
737 return err;
738 }
739
740 static void
nx_mon_disable(struct nexus_adapter * na)741 nx_mon_disable(struct nexus_adapter *na)
742 {
743 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
744 struct nexus_adapter *pna = mna->mna_pna;
745 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
746 enum txrx t;
747 int err;
748 uint32_t i;
749
750 ASSERT(na->na_flags & NAF_ACTIVE);
751
752 bzero(&qfirst, sizeof(qfirst));
753 bzero(&qlast, sizeof(qlast));
754
755 /* blocking kring(s) acquisition; must not fail */
756 err = nx_mon_na_krings_locks(na, qfirst, qlast);
757 ASSERT(err == 0);
758 mna->mna_pkt_copy_from_pkt = NULL;
759 for_rx_tx(t) {
760 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
761 continue;
762 }
763
764 for (i = qfirst[t]; i < qlast[t]; i++) {
765 struct __kern_channel_ring *kring, *mkring;
766
767 kring = &NAKR(pna, t)[i];
768 mkring = &na->na_rx_rings[i];
769 nx_mon_del(mkring, kring, FALSE);
770 }
771 }
772 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
773
774 nx_mon_na_krings_unlock(na, qfirst, qlast);
775 }
776
777 /*
778 * Add the monitor mkring to the list of monitors of kring.
779 * If this is the first monitor, intercept the callbacks
780 */
781 static int
nx_mon_add(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t zcopy)782 nx_mon_add(struct __kern_channel_ring *mkring,
783 struct __kern_channel_ring *kring, boolean_t zcopy)
784 {
785 int error;
786
787 /* make sure the monitor array exists and is big enough */
788 error = nx_mon_kr_alloc(kring, kring->ckr_n_monitors + 1);
789 if (error != 0) {
790 return error;
791 }
792
793 kring->ckr_monitors[kring->ckr_n_monitors] = mkring;
794 mkring->ckr_mon_pos = kring->ckr_n_monitors;
795 kring->ckr_n_monitors++;
796 if (kring->ckr_n_monitors == 1) {
797 /* this is the first monitor, intercept callbacks */
798 SK_DF(SK_VERB_MONITOR,
799 "mkr \"%s\" (0x%llx) krflags 0x%b intercept callbacks "
800 "on kr \"%s\" (0x%llx) krflags 0x%b", mkring->ckr_name,
801 SK_KVA(mkring), mkring->ckr_flags, CKRF_BITS,
802 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
803 CKRF_BITS);
804 kring->ckr_mon_sync = kring->ckr_na_sync;
805 /*
806 * zcopy monitors do not override nm_notify(), but
807 * we save the original one regardless, so that
808 * nx_mon_del() does not need to know the
809 * monitor type
810 */
811 kring->ckr_mon_notify = kring->ckr_na_notify;
812 if (kring->ckr_tx == NR_TX) {
813 kring->ckr_na_sync =
814 (zcopy ? nx_mon_zcopy_parent_txsync :
815 nx_mon_parent_txsync);
816 } else {
817 kring->ckr_na_sync =
818 (zcopy ? nx_mon_zcopy_parent_rxsync :
819 nx_mon_parent_rxsync);
820 if (!zcopy) {
821 /* also intercept notify */
822 kring->ckr_na_notify = nx_mon_parent_notify;
823 kring->ckr_mon_tail = kring->ckr_ktail;
824 }
825 }
826 } else {
827 SK_DF(SK_VERB_MONITOR,
828 "mkr \"%s\" (0x%llx) krflags 0x%b already intercept "
829 "callbacks on kr \"%s\" (0x%llx) krflags 0x%b, "
830 "%u monitors", mkring->ckr_name, SK_KVA(mkring),
831 mkring->ckr_flags, CKRF_BITS, kring->ckr_name,
832 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
833 kring->ckr_n_monitors);
834 }
835 return 0;
836 }
837
838 /*
839 * Remove the monitor mkring from the list of monitors of kring.
840 * If this is the last monitor, restore the original callbacks
841 */
842 static void
nx_mon_del(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t all)843 nx_mon_del(struct __kern_channel_ring *mkring,
844 struct __kern_channel_ring *kring, boolean_t all)
845 {
846 ASSERT(kring->ckr_n_monitors != 0);
847 if (all) {
848 kring->ckr_n_monitors = 0;
849 } else {
850 kring->ckr_n_monitors--;
851 if (mkring->ckr_mon_pos != kring->ckr_n_monitors) {
852 kring->ckr_monitors[mkring->ckr_mon_pos] =
853 kring->ckr_monitors[kring->ckr_n_monitors];
854 kring->ckr_monitors[mkring->ckr_mon_pos]->ckr_mon_pos =
855 mkring->ckr_mon_pos;
856 }
857 kring->ckr_monitors[kring->ckr_n_monitors] = NULL;
858 }
859 if (kring->ckr_n_monitors == 0) {
860 /*
861 * This was the last monitor, restore callbacks
862 * and delete monitor array.
863 */
864 SK_DF(SK_VERB_MONITOR,
865 "restoring sync callback on kr \"%s\" (0x%llx) "
866 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
867 kring->ckr_flags, CKRF_BITS);
868 kring->ckr_na_sync = kring->ckr_mon_sync;
869 kring->ckr_mon_sync = NULL;
870 if (kring->ckr_tx == NR_RX) {
871 SK_DF(SK_VERB_MONITOR,
872 "restoring notify callback on kr \"%s\" (0x%llx) "
873 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
874 kring->ckr_flags, CKRF_BITS);
875 kring->ckr_na_notify = kring->ckr_mon_notify;
876 kring->ckr_mon_notify = NULL;
877 }
878 nx_mon_kr_dealloc(kring);
879 } else {
880 SK_DF(SK_VERB_MONITOR,
881 "NOT restoring callbacks on kr \"%s\" (0x%llx) "
882 "krflags 0x%b, %u monitors left", kring->ckr_name,
883 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
884 kring->ckr_n_monitors);
885 }
886 }
887
888 /*
889 * This is called when the monitored adapter leaves skywalk mode (see
890 * na_unbind_channel). We need to notify the monitors that the monitored
891 * rings are gone. We do this by setting their mna->mna_pna to NULL.
892 * Note that the rings must be stopped when this happens, so no monitor
893 * ring callback can be active.
894 */
895 void
nx_mon_stop(struct nexus_adapter * na)896 nx_mon_stop(struct nexus_adapter *na)
897 {
898 enum txrx t;
899
900 SK_LOCK_ASSERT_HELD();
901
902 /* skip if this adapter has no allocated rings */
903 if (na->na_tx_rings == NULL) {
904 return;
905 }
906
907 na_disable_all_rings(na);
908
909 for_rx_tx(t) {
910 uint32_t i;
911
912 for (i = 0; i < na_get_nrings(na, t); i++) {
913 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
914 uint32_t j;
915
916 for (j = 0; j < kring->ckr_n_monitors; j++) {
917 struct __kern_channel_ring *mkring =
918 kring->ckr_monitors[j];
919 struct nexus_monitor_adapter *mna =
920 (struct nexus_monitor_adapter *)
921 KRNA(mkring);
922
923 /* forget about this adapter */
924 if (mna->mna_pna != NULL) {
925 ASSERT(na == mna->mna_pna);
926 (void) na_release_locked(mna->mna_pna);
927 mna->mna_pna = NULL;
928 }
929 }
930
931 /*
932 * Remove all monitors and restore callbacks;
933 * this is important for nexus adapters that
934 * are linked to one another, e.g. pipe, since
935 * the callback changes on one adapter affects
936 * its peer during sync times.
937 */
938 if (kring->ckr_n_monitors > 0) {
939 nx_mon_del(NULL, kring, TRUE);
940 }
941
942 ASSERT(kring->ckr_monitors == NULL);
943 ASSERT(kring->ckr_max_monitors == 0);
944 ASSERT(kring->ckr_n_monitors == 0);
945 }
946 }
947
948 na_enable_all_rings(na);
949 }
950
951 /*
952 * Common functions for the na_activate() callbacks of both kind of
953 * monitors.
954 */
955 static int
nx_mon_na_activate_common(struct nexus_adapter * na,na_activate_mode_t mode,boolean_t zcopy)956 nx_mon_na_activate_common(struct nexus_adapter *na, na_activate_mode_t mode,
957 boolean_t zcopy)
958 {
959 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
960 struct nexus_adapter *pna = mna->mna_pna;
961 int err = 0;
962
963 ASSERT(na->na_type == NA_MONITOR);
964
965 SK_DF(SK_VERB_MONITOR, "na \"%s\" (0x%llx) %s zcopy %u", na->na_name,
966 SK_KVA(na), na_activate_mode2str(mode), zcopy);
967
968 switch (mode) {
969 case NA_ACTIVATE_MODE_ON:
970 if (pna == NULL) {
971 /* parent left skywalk mode, fatal */
972 SK_ERR("%s: internal error", na->na_name);
973 err = ENXIO;
974 } else {
975 err = nx_mon_enable(na, zcopy);
976 }
977 break;
978
979 case NA_ACTIVATE_MODE_DEFUNCT:
980 break;
981
982 case NA_ACTIVATE_MODE_OFF:
983 if (pna == NULL) {
984 SK_DF(SK_VERB_MONITOR, "%s: parent left skywalk mode, "
985 "nothing to restore", na->na_name);
986 } else {
987 nx_mon_disable(na);
988 }
989 break;
990
991 default:
992 VERIFY(0);
993 /* NOTREACHED */
994 __builtin_unreachable();
995 }
996
997 return err;
998 }
999
1000 /*
1001 * Functions specific for zero-copy monitors.
1002 */
1003
1004 /*
1005 * Common function for both zero-copy tx and rx nm_sync()
1006 * callbacks
1007 */
1008 static int
nx_mon_zcopy_parent_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,enum txrx tx)1009 nx_mon_zcopy_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1010 uint32_t flags, enum txrx tx)
1011 {
1012 struct __kern_channel_ring *mkring = kring->ckr_monitors[0];
1013 int rel_slots, free_slots, busy, sent = 0;
1014 slot_idx_t beg, end, i;
1015 const slot_idx_t lim = kring->ckr_lim;
1016 const slot_idx_t mlim;
1017 int error = 0;
1018
1019 if (mkring == NULL) {
1020 SK_RD(5, "NULL monitor on kr \"%s\" (0x%llx) krflags 0x%b",
1021 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1022 CKRF_BITS);
1023 return 0;
1024 }
1025
1026 ASSERT(!KR_KERNEL_ONLY(kring));
1027 ASSERT(!KR_KERNEL_ONLY(mkring));
1028
1029 /* deconst */
1030 *(slot_idx_t *)(uintptr_t)&mlim = mkring->ckr_lim;
1031
1032 /* get the relased slots (rel_slots) */
1033 if (tx == NR_TX) {
1034 beg = kring->ckr_ktail;
1035 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1036 if (error) {
1037 return error;
1038 }
1039 end = kring->ckr_ktail;
1040 } else { /* NR_RX */
1041 beg = kring->ckr_khead;
1042 end = kring->ckr_rhead;
1043 }
1044
1045 rel_slots = end - beg;
1046 if (rel_slots < 0) {
1047 rel_slots += kring->ckr_num_slots;
1048 }
1049
1050 if (!rel_slots) {
1051 /*
1052 * No released slots, but we still need
1053 * to call rxsync if this is a rx ring
1054 */
1055 goto out_rxsync;
1056 }
1057
1058 /*
1059 * We need to lock the monitor receive ring, since it
1060 * is the target of bot tx and rx traffic from the monitored
1061 * adapter
1062 */
1063 KR_LOCK(mkring);
1064 /* get the free slots available on the monitor ring */
1065 i = mkring->ckr_ktail;
1066 busy = i - mkring->ckr_khead;
1067 if (busy < 0) {
1068 busy += mkring->ckr_num_slots;
1069 }
1070 free_slots = mlim - busy;
1071
1072 if (!free_slots) {
1073 goto out;
1074 }
1075
1076 /* swap min(free_slots, rel_slots) slots */
1077 if (free_slots < rel_slots) {
1078 beg += (rel_slots - free_slots);
1079 if (beg >= kring->ckr_num_slots) {
1080 beg -= kring->ckr_num_slots;
1081 }
1082 rel_slots = free_slots;
1083 }
1084
1085 sent = rel_slots;
1086 for (; rel_slots; rel_slots--) {
1087 /*
1088 * Swap the slots.
1089 *
1090 * XXX: [email protected] -- this bypasses the slot attach/detach
1091 * interface, and needs to be changed when monitor adopts the
1092 * packet APIs. SD_SWAP() will perform a block copy of the
1093 * swap, and will readjust the kernel slot descriptor's sd_user
1094 * accordingly.
1095 */
1096 SD_SWAP(KR_KSD(mkring, i), KR_USD(mkring, i),
1097 KR_KSD(kring, beg), KR_USD(kring, beg));
1098
1099 SK_RD(5, "beg %u buf_idx %u", beg,
1100 METADATA_IDX(KR_KSD(kring, beg)->sd_qum));
1101
1102 beg = SLOT_NEXT(beg, lim);
1103 i = SLOT_NEXT(i, mlim);
1104 }
1105 membar_sync();
1106 mkring->ckr_ktail = i;
1107
1108 out:
1109 KR_UNLOCK(mkring);
1110
1111 if (sent) {
1112 /* notify the new frames to the monitor */
1113 (void) mkring->ckr_na_notify(mkring, p, 0);
1114 }
1115
1116 out_rxsync:
1117 if (tx == NR_RX) {
1118 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1119 }
1120
1121 return error;
1122 }
1123
1124 /*
1125 * Callback used to replace the ckr_na_sync callback in the monitored tx rings.
1126 */
1127 static int
nx_mon_zcopy_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1128 nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1129 uint32_t flags)
1130 {
1131 SK_DF(SK_VERB_MONITOR,
1132 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1133 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1134 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1135 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_TX);
1136 }
1137
1138 /* callback used to replace the nm_sync callback in the monitored rx rings */
1139 static int
nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1140 nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1141 uint32_t flags)
1142 {
1143 SK_DF(SK_VERB_MONITOR,
1144 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1145 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1146 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1147 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_RX);
1148 }
1149
1150 static int
nx_mon_zcopy_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1151 nx_mon_zcopy_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1152 {
1153 return nx_mon_na_activate_common(na, mode, TRUE /* zcopy */);
1154 }
1155
1156 /* na_dtor callback for monitors */
1157 static void
nx_mon_zcopy_na_dtor(struct nexus_adapter * na)1158 nx_mon_zcopy_na_dtor(struct nexus_adapter *na)
1159 {
1160 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1161 struct nexus_adapter *pna = mna->mna_pna;
1162
1163 SK_LOCK_ASSERT_HELD();
1164 ASSERT(na->na_type == NA_MONITOR);
1165
1166 if (pna != NULL) {
1167 (void) na_release_locked(pna);
1168 mna->mna_pna = NULL;
1169 }
1170 }
1171
1172 /*
1173 * Functions specific for copy monitors.
1174 */
1175
1176 static void
nx_mon_parent_sync(struct __kern_channel_ring * kring,struct proc * p,slot_idx_t first_new,int new_slots)1177 nx_mon_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1178 slot_idx_t first_new, int new_slots)
1179 {
1180 nexus_meta_type_t md_type = KRNA(kring)->na_md_type;
1181 uint32_t j;
1182
1183 for (j = 0; j < kring->ckr_n_monitors; j++) {
1184 struct __kern_channel_ring *mkring = kring->ckr_monitors[j];
1185 slot_idx_t i, mlim, beg;
1186 int free_slots, busy, sent = 0, m;
1187 const slot_idx_t lim = kring->ckr_lim;
1188 struct nexus_adapter *dst_na = KRNA(mkring);
1189 struct nexus_monitor_adapter *mna =
1190 (struct nexus_monitor_adapter *)dst_na;
1191 uint32_t max_len = mkring->ckr_pp->pp_max_frags *
1192 mkring->ckr_pp->pp_buflet_size;
1193
1194 /*
1195 * src and dst adapters must share the same nexus;
1196 * this test is done in nx_monitor_na_find(). This
1197 * covers both buffer and metadata sizes.
1198 */
1199
1200 mlim = mkring->ckr_lim;
1201
1202 /*
1203 * We need to lock the monitor receive ring, since it
1204 * is the target of both tx and rx traffics from the
1205 * monitored adapter.
1206 */
1207 KR_LOCK(mkring);
1208 /* get the free slots available on the monitor ring */
1209 i = mkring->ckr_ktail;
1210 busy = i - mkring->ckr_khead;
1211 if (busy < 0) {
1212 busy += mkring->ckr_num_slots;
1213 }
1214 free_slots = mlim - busy;
1215
1216 if (!free_slots) {
1217 goto out;
1218 }
1219
1220 /* copy min(free_slots, new_slots) slots */
1221 m = new_slots;
1222 beg = first_new;
1223 if (free_slots < m) {
1224 beg += (m - free_slots);
1225 if (beg >= kring->ckr_num_slots) {
1226 beg -= kring->ckr_num_slots;
1227 }
1228 m = free_slots;
1229 }
1230
1231 ASSERT(KRNA(mkring)->na_md_type == md_type);
1232
1233 for (; m; m--) {
1234 struct __kern_slot_desc *src_sd = KR_KSD(kring, beg);
1235 struct __kern_slot_desc *dst_sd = KR_KSD(mkring, i);
1236 struct __kern_packet *spkt, *dpkt;
1237 kern_packet_t sph, dph;
1238 uint32_t copy_len;
1239
1240 if (!KSD_VALID_METADATA(src_sd)) {
1241 goto skip;
1242 }
1243
1244 /* retreive packet handles from slot */
1245 spkt = src_sd->sd_pkt;
1246 sph = SK_PTR_ENCODE(spkt, METADATA_TYPE(spkt),
1247 METADATA_SUBTYPE(spkt));
1248 dpkt = dst_sd->sd_pkt;
1249 dph = SK_PTR_ENCODE(dpkt, METADATA_TYPE(dpkt),
1250 METADATA_SUBTYPE(dpkt));
1251
1252 ASSERT(METADATA_TYPE(spkt) == METADATA_TYPE(dpkt));
1253
1254 ASSERT(spkt->pkt_qum.qum_len <= (UINT32_MAX - 63));
1255 copy_len = spkt->pkt_qum.qum_len;
1256
1257 /* round to a multiple of 64 */
1258 copy_len = (copy_len + 63) & ~63;
1259
1260 if (__improbable(copy_len > max_len)) {
1261 SK_RD(5, "kr \"%s\" -> mkr \"%s\": "
1262 "truncating %u to %u",
1263 kring->ckr_name, mkring->ckr_name,
1264 (uint32_t)copy_len, max_len);
1265 copy_len = max_len;
1266 }
1267
1268 /* copy buffers */
1269 mna->mna_pkt_copy_from_pkt(kring->ckr_tx, dph, 0, sph,
1270 0, copy_len, FALSE, 0, 0, FALSE);
1271
1272 /* copy the associated meta data */
1273 _QUM_COPY(&(spkt)->pkt_qum, &(dpkt)->pkt_qum);
1274 if (md_type == NEXUS_META_TYPE_PACKET) {
1275 _PKT_COPY(spkt, dpkt);
1276 ASSERT(dpkt->pkt_mbuf == NULL);
1277 }
1278
1279 ASSERT(!(dpkt->pkt_qum.qum_qflags & QUM_F_KERNEL_ONLY) ||
1280 PP_KERNEL_ONLY(dpkt->pkt_qum.qum_pp));
1281
1282 sent++;
1283 i = SLOT_NEXT(i, mlim);
1284 skip:
1285 beg = SLOT_NEXT(beg, lim);
1286 }
1287 membar_sync();
1288 mkring->ckr_ktail = i;
1289 out:
1290 KR_UNLOCK(mkring);
1291
1292 if (sent) {
1293 /* notify the new frames to the monitor */
1294 (void) mkring->ckr_na_notify(mkring, p, 0);
1295 }
1296 }
1297 }
1298
1299 /* callback used to replace the nm_sync callback in the monitored tx rings */
1300 static int
nx_mon_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1301 nx_mon_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1302 uint32_t flags)
1303 {
1304 slot_idx_t first_new;
1305 int new_slots;
1306 nexus_type_t nx_type =
1307 kring->ckr_na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
1308
1309 /*
1310 * For user pipe nexus, txsync can also be initated from RX process
1311 * context, hence user pipe tx ring should be accessed holding
1312 * ckr_qlock.
1313 */
1314 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1315 KR_LOCK(kring);
1316 }
1317
1318 /* get the new slots */
1319 first_new = kring->ckr_khead;
1320 new_slots = kring->ckr_rhead - first_new;
1321 if (new_slots < 0) {
1322 new_slots += kring->ckr_num_slots;
1323 }
1324 if (new_slots) {
1325 nx_mon_parent_sync(kring, p, first_new, new_slots);
1326 }
1327
1328 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1329 KR_UNLOCK(kring);
1330 }
1331
1332 return kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1333 }
1334
1335 /* callback used to replace the nm_sync callback in the monitored rx rings */
1336 static int
nx_mon_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1337 nx_mon_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1338 uint32_t flags)
1339 {
1340 slot_idx_t first_new;
1341 int new_slots, error;
1342
1343 /* get the new slots */
1344 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1345 if (error) {
1346 return error;
1347 }
1348 first_new = kring->ckr_mon_tail;
1349 new_slots = kring->ckr_ktail - first_new;
1350 if (new_slots < 0) {
1351 new_slots += kring->ckr_num_slots;
1352 }
1353 if (new_slots) {
1354 nx_mon_parent_sync(kring, p, first_new, new_slots);
1355 }
1356 kring->ckr_mon_tail = kring->ckr_ktail;
1357 return 0;
1358 }
1359
1360 /*
1361 * Callback used to replace the nm_notify() callback in the monitored rx rings
1362 */
1363 static int
nx_mon_parent_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1364 nx_mon_parent_notify(struct __kern_channel_ring *kring, struct proc *p,
1365 uint32_t flags)
1366 {
1367 int err = 0;
1368 sk_protect_t protect = NULL;
1369
1370 SK_DF(SK_VERB_MONITOR | SK_VERB_NOTIFY |
1371 ((kring->ckr_tx == NR_TX) ? SK_VERB_TX : SK_VERB_RX),
1372 "kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x", kring->ckr_name,
1373 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1374 /*
1375 * ?xsync callbacks have tryget called by their callers,
1376 * but here we have to call it by ourself. If we can't
1377 * acquire the exclusive sync right, skip the sync.
1378 */
1379 if ((err = kr_enter(kring, FALSE)) == 0) {
1380 protect = sk_sync_protect();
1381 nx_mon_parent_rxsync(kring, p, NA_SYNCF_FORCE_READ);
1382 sk_sync_unprotect(protect);
1383 kr_exit(kring);
1384 }
1385 /* in all cases (even error), we must invoke notify */
1386 kring->ckr_mon_notify(kring, p, (NA_NOTEF_MONITOR | flags));
1387 return err;
1388 }
1389
1390 static int
nx_mon_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1391 nx_mon_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1392 {
1393 return nx_mon_na_activate_common(na, mode, FALSE /* no zcopy */);
1394 }
1395
1396 static void
nx_mon_na_dtor(struct nexus_adapter * na)1397 nx_mon_na_dtor(struct nexus_adapter *na)
1398 {
1399 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1400 struct nexus_adapter *pna = mna->mna_pna;
1401
1402 SK_LOCK_ASSERT_HELD();
1403 ASSERT(na->na_type == NA_MONITOR);
1404
1405 if (pna != NULL) {
1406 (void) na_release_locked(pna);
1407 mna->mna_pna = NULL;
1408 }
1409 }
1410
1411 /* check if chr is a request for a monitor adapter that we can satisfy */
1412 int
nx_monitor_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1413 nx_monitor_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1414 struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb,
1415 struct proc *p, struct nexus_adapter **na, boolean_t create)
1416 {
1417 #pragma unused(ch)
1418 boolean_t zcopy = !!(chr->cr_mode & CHMODE_MONITOR_NO_COPY);
1419 struct nexus_adapter *pna = NULL; /* parent adapter */
1420 struct nexus_monitor_adapter *mna = NULL;
1421 char monsuff[10] = "";
1422 struct chreq pchr;
1423 uint32_t i;
1424 int error;
1425 enum txrx t;
1426
1427 SK_LOCK_ASSERT_HELD();
1428 *na = NULL;
1429
1430 #if SK_LOG
1431 uuid_string_t uuidstr;
1432 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1433 "ring_id %d ring_set %u ep_type %u:%u ch0 0x%llx create %u%s",
1434 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1435 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1436 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1437 chr->cr_real_endpoint, chr->cr_endpoint, SK_KVA(ch0), create,
1438 !(chr->cr_mode & CHMODE_MONITOR) ? " (skipped)" : "");
1439 #endif /* SK_LOG */
1440
1441 if (!(chr->cr_mode & CHMODE_MONITOR)) {
1442 return 0;
1443 }
1444
1445 /* XXX: Don't allow user packet pool mode in monitor for now */
1446 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
1447 SK_ERR("User Packet pool mode not supported for monitor");
1448 return ENOTSUP;
1449 }
1450
1451 mna = na_mon_alloc(Z_WAITOK);
1452
1453 ASSERT(mna->mna_up.na_type == NA_MONITOR);
1454 ASSERT(mna->mna_up.na_free == na_mon_free);
1455
1456 /* override the ring set since we're monitoring */
1457 chr->cr_ring_set = RING_SET_ALL;
1458
1459 if (ch0 != NULL) {
1460 /*
1461 * We've been given the owning channel from ch_open();
1462 * use this as shortcut since otherwise we'd have to
1463 * find it ourselves.
1464 */
1465 #if (DEBUG || DEVELOPMENT)
1466 ASSERT(!(ch0->ch_info->cinfo_ch_mode & CHMODE_MONITOR));
1467 ASSERT(ch0->ch_info->cinfo_nx_port == chr->cr_port);
1468 #endif /* DEBUG || DEVELOPMENT */
1469 pna = ch0->ch_na;
1470 na_retain_locked(pna);
1471 } else {
1472 /*
1473 * First, try to find the adapter that we want to monitor
1474 * We use the same chr, after we have turned off the monitor
1475 * flags. In this way we can potentially monitor everything
1476 * skywalk understands, except other monitors.
1477 */
1478 memcpy(&pchr, chr, sizeof(pchr));
1479 pchr.cr_mode &= ~CHMODE_MONITOR;
1480 error = na_find(ch, nx, &pchr, ch0, nxb, p, &pna, create);
1481 if (error != 0) {
1482 SK_ERR("parent lookup failed: %d", error);
1483 return error;
1484 }
1485 }
1486 ASSERT(pna != NULL);
1487 SK_DF(SK_VERB_MONITOR,
1488 "found parent: \"%s\" (0x%llx)", pna->na_name, SK_KVA(pna));
1489
1490 if (!NA_IS_ACTIVE(pna)) {
1491 /* parent not in skywalk mode */
1492 /*
1493 * XXX we can wait for the parent to enter skywalk mode,
1494 * by intercepting its na_activate() callback (2014-03-16)
1495 */
1496 SK_ERR("parent \"%s\" (0x%llx) not in skywalk mode",
1497 pna->na_name, SK_KVA(pna));
1498 error = ENXIO;
1499 goto put_out;
1500 } else if (zcopy && NA_KERNEL_ONLY(pna)) {
1501 /*
1502 * Zero-copy mode requires the parent adapter to be
1503 * created in a non-kernel-only mode.
1504 */
1505 SK_ERR("parent \"%s\" (0x%llx) is in kernel-only mode",
1506 pna->na_name, SK_KVA(pna));
1507 error = ENODEV;
1508 goto put_out;
1509 }
1510
1511 /* grab all the rings we need in the parent */
1512 mna->mna_pna = pna;
1513 error = na_interp_ringid(pna, chr->cr_ring_id, chr->cr_ring_set,
1514 mna->mna_first, mna->mna_last);
1515 if (error != 0) {
1516 SK_ERR("ring_mode %u ring_id %d error %d", chr->cr_ring_set,
1517 (int)chr->cr_ring_id, error);
1518 goto put_out;
1519 }
1520 if (mna->mna_last[NR_TX] - mna->mna_first[NR_TX] == 1) {
1521 (void) snprintf(monsuff, 10, "-%u", mna->mna_first[NR_TX]);
1522 }
1523 (void) snprintf(mna->mna_up.na_name, sizeof(mna->mna_up.na_name),
1524 "%s%s/%s%s%s", pna->na_name, monsuff, zcopy ? "z" : "",
1525 (chr->cr_mode & CHMODE_MONITOR_TX) ? "r" : "",
1526 (chr->cr_mode & CHMODE_MONITOR_RX) ? "t" : "");
1527 uuid_generate_random(mna->mna_up.na_uuid);
1528
1529 /* these don't apply to the monitor adapter */
1530 *(nexus_stats_type_t *)(uintptr_t)&mna->mna_up.na_stats_type =
1531 NEXUS_STATS_TYPE_INVALID;
1532 *(uint32_t *)(uintptr_t)&mna->mna_up.na_flowadv_max = 0;
1533
1534 if (zcopy) {
1535 /*
1536 * Zero copy monitors need exclusive access
1537 * to the monitored rings.
1538 */
1539 for_rx_tx(t) {
1540 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1541 continue;
1542 }
1543 for (i = mna->mna_first[t];
1544 i < mna->mna_last[t]; i++) {
1545 struct __kern_channel_ring *kring =
1546 &NAKR(pna, t)[i];
1547 if (kring->ckr_n_monitors > 0) {
1548 error = EBUSY;
1549 SK_ERR("kr \"%s\" already monitored "
1550 "by \"%s\"", kring->ckr_name,
1551 kring->ckr_monitors[0]->ckr_name);
1552 goto put_out;
1553 }
1554 }
1555 }
1556 mna->mna_up.na_activate = nx_mon_zcopy_na_activate;
1557 mna->mna_up.na_dtor = nx_mon_zcopy_na_dtor;
1558 /*
1559 * To have zero copy, we need to use the same memory allocator
1560 * as the monitored port.
1561 */
1562 mna->mna_up.na_arena = pna->na_arena;
1563 skmem_arena_retain((&mna->mna_up)->na_arena);
1564 atomic_bitset_32(&mna->mna_up.na_flags, NAF_MEM_LOANED);
1565 } else {
1566 /* normal monitors are incompatible with zero copy ones */
1567 for_rx_tx(t) {
1568 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1569 continue;
1570 }
1571 for (i = mna->mna_first[t];
1572 i < mna->mna_last[t]; i++) {
1573 struct __kern_channel_ring *kring =
1574 &NAKR(pna, t)[i];
1575 if (kring->ckr_n_monitors > 0 &&
1576 KRNA(kring->ckr_monitors[0])->
1577 na_activate == nx_mon_zcopy_na_activate) {
1578 error = EBUSY;
1579 SK_ERR("kr \"%s\" is busy (zcopy)",
1580 kring->ckr_name);
1581 goto put_out;
1582 }
1583 }
1584 }
1585 mna->mna_up.na_activate = nx_mon_na_activate;
1586 mna->mna_up.na_dtor = nx_mon_na_dtor;
1587 /*
1588 * allocate a new (private) allocator instance using the
1589 * parent nexus configuration.
1590 */
1591 if ((error = nx_monitor_prov_s.nxdom_prov_mem_new(
1592 NX_DOM_PROV(nx), nx, &mna->mna_up)) != 0) {
1593 ASSERT(mna->mna_up.na_arena == NULL);
1594 goto put_out;
1595 }
1596 ASSERT(mna->mna_up.na_arena != NULL);
1597 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1598 }
1599 *(nexus_meta_type_t *)(uintptr_t)&mna->mna_up.na_md_type =
1600 pna->na_md_type;
1601 *(nexus_meta_subtype_t *)(uintptr_t)&mna->mna_up.na_md_subtype =
1602 pna->na_md_subtype;
1603
1604 /* a do-nothing txsync: monitors cannot be used to inject packets */
1605 mna->mna_up.na_txsync = nx_mon_na_txsync;
1606 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1607 mna->mna_up.na_krings_create = nx_mon_na_krings_create;
1608 mna->mna_up.na_krings_delete = nx_mon_na_krings_delete;
1609
1610 /*
1611 * We set the number of our na_rx_rings to be
1612 * max(na_num_tx_rings, na_num_rx_rings) in the parent
1613 */
1614 na_set_nrings(&mna->mna_up, NR_TX, na_get_nrings(pna, NR_TX));
1615 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_RX));
1616 if (na_get_nrings(pna, NR_TX) > na_get_nrings(pna, NR_RX)) {
1617 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_TX));
1618 }
1619 na_set_nslots(&mna->mna_up, NR_TX, na_get_nslots(pna, NR_TX));
1620 na_set_nslots(&mna->mna_up, NR_RX, na_get_nslots(pna, NR_RX));
1621
1622 na_attach_common(&mna->mna_up, nx, &nx_monitor_prov_s);
1623
1624 /* remember the traffic directions we have to monitor */
1625 mna->mna_mode = (chr->cr_mode & CHMODE_MONITOR);
1626
1627 /* keep the reference to the parent */
1628 *na = &mna->mna_up;
1629 na_retain_locked(*na);
1630
1631 /* sanity check: monitor and monitored adapters must share the nexus */
1632 ASSERT((*na)->na_nx == pna->na_nx);
1633
1634 #if SK_LOG
1635 SK_DF(SK_VERB_MONITOR, "created monitor adapter 0x%llx", SK_KVA(mna));
1636 SK_DF(SK_VERB_MONITOR, "na_name: \"%s\"", mna->mna_up.na_name);
1637 SK_DF(SK_VERB_MONITOR, " UUID: %s",
1638 sk_uuid_unparse(mna->mna_up.na_uuid, uuidstr));
1639 SK_DF(SK_VERB_MONITOR, " nx: 0x%llx (\"%s\":\"%s\")",
1640 SK_KVA(mna->mna_up.na_nx), NX_DOM(mna->mna_up.na_nx)->nxdom_name,
1641 NX_DOM_PROV(mna->mna_up.na_nx)->nxdom_prov_name);
1642 SK_DF(SK_VERB_MONITOR, " flags: 0x%b",
1643 mna->mna_up.na_flags, NAF_BITS);
1644 SK_DF(SK_VERB_MONITOR, " rings: tx %u rx %u",
1645 na_get_nrings(&mna->mna_up, NR_TX),
1646 na_get_nrings(&mna->mna_up, NR_RX));
1647 SK_DF(SK_VERB_MONITOR, " slots: tx %u rx %u",
1648 na_get_nslots(&mna->mna_up, NR_TX),
1649 na_get_nslots(&mna->mna_up, NR_RX));
1650 #if CONFIG_NEXUS_USER_PIPE
1651 SK_DF(SK_VERB_MONITOR, " next_pipe: %u", mna->mna_up.na_next_pipe);
1652 SK_DF(SK_VERB_MONITOR, " max_pipes: %u", mna->mna_up.na_max_pipes);
1653 #endif /* CONFIG_NEXUS_USER_PIPE */
1654 SK_DF(SK_VERB_MONITOR, " mna_tx_rings: [%u,%u)", mna->mna_first[NR_TX],
1655 mna->mna_last[NR_TX]);
1656 SK_DF(SK_VERB_MONITOR, " mna_rx_rings: [%u,%u)", mna->mna_first[NR_RX],
1657 mna->mna_last[NR_RX]);
1658 SK_DF(SK_VERB_MONITOR, " mna_mode: %u", mna->mna_mode);
1659 #endif /* SK_LOG */
1660
1661 return 0;
1662
1663 put_out:
1664 if (pna != NULL) {
1665 (void) na_release_locked(pna);
1666 pna = NULL;
1667 }
1668 NA_FREE(&mna->mna_up);
1669 return error;
1670 }
1671
1672 static void
nx_mon_quantum_copy_64x(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t unused_arg1,const uint16_t unused_arg2,const uint16_t unused_arg3,const boolean_t unused_arg4)1673 nx_mon_quantum_copy_64x(const enum txrx t, kern_packet_t dph,
1674 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
1675 const uint32_t len, const boolean_t unused_arg1,
1676 const uint16_t unused_arg2, const uint16_t unused_arg3,
1677 const boolean_t unused_arg4)
1678 {
1679 /* for function prototype parity with pkt_copy_from_pkt_t */
1680 #pragma unused(unused_arg1, unused_arg2, unused_arg3, unused_arg4)
1681 #pragma unused(t, doff, soff)
1682 struct __kern_quantum *dqum = SK_PTR_ADDR_KQUM(dph);
1683 struct __kern_quantum *squm = SK_PTR_ADDR_KQUM(sph);
1684 uint8_t *sbuf, *dbuf;
1685
1686 ASSERT(METADATA_TYPE(squm) == NEXUS_META_TYPE_QUANTUM);
1687 ASSERT(METADATA_TYPE(squm) == METADATA_TYPE(dqum));
1688 VERIFY(IS_P2ALIGNED(len, 64));
1689
1690 MD_BUFLET_ADDR(squm, sbuf);
1691 MD_BUFLET_ADDR(dqum, dbuf);
1692 VERIFY(IS_P2ALIGNED(dbuf, sizeof(uint64_t)));
1693
1694 if (__probable(IS_P2ALIGNED(sbuf, sizeof(uint64_t)))) {
1695 sk_copy64_64x((uint64_t *)(void *)sbuf,
1696 (uint64_t *)(void *)dbuf, len);
1697 } else {
1698 bcopy(sbuf, dbuf, len);
1699 }
1700 /*
1701 * This copy routine only copies to/from a buflet, so the length
1702 * is guaranteed be <= the size of a buflet.
1703 */
1704 VERIFY(len <= UINT16_MAX);
1705 METADATA_SET_LEN(dqum, (uint16_t)len, 0);
1706 }
1707