1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 /*
55 * $FreeBSD$
56 *
57 * Monitors
58 *
59 * netmap monitors can be used to do monitoring of network traffic
60 * on another adapter, when the latter adapter is working in netmap mode.
61 *
62 * Monitors offer to userspace the same interface as any other netmap port,
63 * with as many pairs of netmap rings as the monitored adapter.
64 * However, only the rx rings are actually used. Each monitor rx ring receives
65 * the traffic transiting on both the tx and rx corresponding rings in the
66 * monitored adapter. During registration, the user can choose if she wants
67 * to intercept tx only, rx only, or both tx and rx traffic.
68 *
69 * If the monitor is not able to cope with the stream of frames, excess traffic
70 * will be dropped.
71 *
72 * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
73 *
74 * Monitors can be either zero-copy or copy-based.
75 *
76 * Copy monitors see the frames before they are consumed:
77 *
78 * - For tx traffic, this is when the application sends them, before they are
79 * passed down to the adapter.
80 *
81 * - For rx traffic, this is when they are received by the adapter, before
82 * they are sent up to the application, if any (note that, if no
83 * application is reading from a monitored ring, the ring will eventually
84 * fill up and traffic will stop).
85 *
86 * Zero-copy monitors only see the frames after they have been consumed:
87 *
88 * - For tx traffic, this is after the slots containing the frames have been
89 * marked as free. Note that this may happen at a considerably delay after
90 * frame transmission, since freeing of slots is often done lazily.
91 *
92 * - For rx traffic, this is after the consumer on the monitored adapter
93 * has released them. In most cases, the consumer is a userspace
94 * application which may have modified the frame contents.
95 *
96 * Several copy monitors may be active on any ring. Zero-copy monitors,
97 * instead, need exclusive access to each of the monitored rings. This may
98 * change in the future, if we implement zero-copy monitor chaining.
99 *
100 */
101
102 #include <skywalk/os_skywalk_private.h>
103 #include <skywalk/nexus/monitor/nx_monitor.h>
104
105 static int nx_mon_na_txsync(struct __kern_channel_ring *, struct proc *,
106 uint32_t);
107 static int nx_mon_na_rxsync(struct __kern_channel_ring *, struct proc *,
108 uint32_t);
109 static int nx_mon_na_krings_create(struct nexus_adapter *,
110 struct kern_channel *);
111 static void nx_mon_na_krings_delete(struct nexus_adapter *,
112 struct kern_channel *, boolean_t);
113 static uint32_t nx_mon_txrx2chmode(enum txrx);
114 static int nx_mon_kr_alloc(struct __kern_channel_ring *, uint32_t);
115 static void nx_mon_kr_dealloc(struct __kern_channel_ring *);
116 static int nx_mon_na_krings_locks(struct nexus_adapter *,
117 uint32_t[NR_TXRX], uint32_t[NR_TXRX]);
118 static void nx_mon_na_krings_unlock(struct nexus_adapter *,
119 const uint32_t[NR_TXRX], const uint32_t[NR_TXRX]);
120 static int nx_mon_enable(struct nexus_adapter *, int);
121 static void nx_mon_disable(struct nexus_adapter *);
122 static int nx_mon_add(struct __kern_channel_ring *,
123 struct __kern_channel_ring *, boolean_t);
124 static void nx_mon_del(struct __kern_channel_ring *,
125 struct __kern_channel_ring *, boolean_t);
126 static int nx_mon_na_activate_common(struct nexus_adapter *,
127 na_activate_mode_t, boolean_t);
128 static pkt_copy_from_pkt_t nx_mon_quantum_copy_64x;
129
130 static int nx_mon_zcopy_parent_sync(struct __kern_channel_ring *,
131 struct proc *, uint32_t, enum txrx);
132 static int nx_mon_zcopy_na_activate(struct nexus_adapter *, na_activate_mode_t);
133 static void nx_mon_zcopy_na_dtor(struct nexus_adapter *);
134
135 static void nx_mon_parent_sync(struct __kern_channel_ring *, struct proc *,
136 slot_idx_t, int);
137 static int nx_mon_na_activate(struct nexus_adapter *, na_activate_mode_t);
138 static void nx_mon_na_dtor(struct nexus_adapter *);
139
140 /*
141 * monitors work by replacing the nm_sync() and possibly the
142 * nm_notify() callbacks in the monitored rings.
143 */
144 static int nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *,
145 struct proc *, uint32_t);
146 static int nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *,
147 struct proc *, uint32_t);
148 static int nx_mon_parent_txsync(struct __kern_channel_ring *,
149 struct proc *, uint32_t);
150 static int nx_mon_parent_rxsync(struct __kern_channel_ring *,
151 struct proc *, uint32_t);
152 static int nx_mon_parent_notify(struct __kern_channel_ring *,
153 struct proc *, uint32_t);
154
155 static void nx_mon_dom_init(struct nxdom *);
156 static void nx_mon_dom_terminate(struct nxdom *);
157 static void nx_mon_dom_fini(struct nxdom *);
158 static int nx_mon_dom_bind_port(struct kern_nexus *, nexus_port_t *,
159 struct nxbind *, void *);
160 static int nx_mon_dom_unbind_port(struct kern_nexus *, nexus_port_t);
161 static int nx_mon_dom_connect(struct kern_nexus_domain_provider *,
162 struct kern_nexus *, struct kern_channel *, struct chreq *,
163 struct kern_channel *, struct nxbind *, struct proc *);
164 static void nx_mon_dom_disconnect(struct kern_nexus_domain_provider *,
165 struct kern_nexus *, struct kern_channel *);
166 static void nx_mon_dom_defunct(struct kern_nexus_domain_provider *,
167 struct kern_nexus *, struct kern_channel *, struct proc *);
168 static void nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *,
169 struct kern_nexus *, struct kern_channel *, boolean_t);
170
171 static int nx_mon_prov_init(struct kern_nexus_domain_provider *);
172 static int nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *,
173 const struct nxprov_params *, struct nxprov_adjusted_params *);
174 static int nx_mon_prov_params(struct kern_nexus_domain_provider *,
175 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
176 struct skmem_region_params[SKMEM_REGIONS], uint32_t);
177 static int nx_mon_prov_mem_new(struct kern_nexus_domain_provider *,
178 struct kern_nexus *, struct nexus_adapter *);
179 static void nx_mon_prov_fini(struct kern_nexus_domain_provider *);
180
181 static struct nexus_monitor_adapter *na_mon_alloc(zalloc_flags_t);
182 static void na_mon_free(struct nexus_adapter *);
183
184 struct nxdom nx_monitor_dom_s = {
185 .nxdom_prov_head =
186 STAILQ_HEAD_INITIALIZER(nx_monitor_dom_s.nxdom_prov_head),
187 .nxdom_type = NEXUS_TYPE_MONITOR,
188 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
189 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
190 .nxdom_name = "monitor",
191 /*
192 * The following values don't really matter much, as a monitor
193 * isn't usable on its own; we just define them as non-zeroes.
194 */
195 .nxdom_ports = {
196 .nb_def = 1,
197 .nb_min = 1,
198 .nb_max = 1,
199 },
200 .nxdom_tx_rings = {
201 .nb_def = 1,
202 .nb_min = 1,
203 .nb_max = 1,
204 },
205 .nxdom_rx_rings = {
206 .nb_def = 1,
207 .nb_min = 1,
208 .nb_max = 1,
209 },
210 .nxdom_tx_slots = {
211 .nb_def = 1,
212 .nb_min = 1,
213 .nb_max = 1,
214 },
215 .nxdom_rx_slots = {
216 .nb_def = 1,
217 .nb_min = 1,
218 .nb_max = 1,
219 },
220 .nxdom_buf_size = {
221 .nb_def = 64,
222 .nb_min = 64,
223 .nb_max = 64,
224 },
225 .nxdom_large_buf_size = {
226 .nb_def = 0,
227 .nb_min = 0,
228 .nb_max = 0,
229 },
230 .nxdom_meta_size = {
231 .nb_def = NX_METADATA_OBJ_MIN_SZ,
232 .nb_min = NX_METADATA_OBJ_MIN_SZ,
233 .nb_max = NX_METADATA_USR_MAX_SZ,
234 },
235 .nxdom_stats_size = {
236 .nb_def = 0,
237 .nb_min = 0,
238 .nb_max = NX_STATS_MAX_SZ,
239 },
240 .nxdom_pipes = {
241 .nb_def = 0,
242 .nb_min = 0,
243 .nb_max = 0,
244 },
245 .nxdom_flowadv_max = {
246 .nb_def = 0,
247 .nb_min = 0,
248 .nb_max = NX_FLOWADV_MAX,
249 },
250 .nxdom_nexusadv_size = {
251 .nb_def = 0,
252 .nb_min = 0,
253 .nb_max = NX_NEXUSADV_MAX_SZ,
254 },
255 .nxdom_capabilities = {
256 .nb_def = NXPCAP_USER_CHANNEL,
257 .nb_min = NXPCAP_USER_CHANNEL,
258 .nb_max = NXPCAP_USER_CHANNEL,
259 },
260 .nxdom_qmap = {
261 .nb_def = NEXUS_QMAP_TYPE_INVALID,
262 .nb_min = NEXUS_QMAP_TYPE_INVALID,
263 .nb_max = NEXUS_QMAP_TYPE_INVALID,
264 },
265 .nxdom_max_frags = {
266 .nb_def = NX_PBUF_FRAGS_DEFAULT,
267 .nb_min = NX_PBUF_FRAGS_MIN,
268 .nb_max = NX_PBUF_FRAGS_DEFAULT,
269 },
270 .nxdom_init = nx_mon_dom_init,
271 .nxdom_terminate = nx_mon_dom_terminate,
272 .nxdom_fini = nx_mon_dom_fini,
273 .nxdom_find_port = NULL,
274 .nxdom_port_is_reserved = NULL,
275 .nxdom_bind_port = nx_mon_dom_bind_port,
276 .nxdom_unbind_port = nx_mon_dom_unbind_port,
277 .nxdom_connect = nx_mon_dom_connect,
278 .nxdom_disconnect = nx_mon_dom_disconnect,
279 .nxdom_defunct = nx_mon_dom_defunct,
280 .nxdom_defunct_finalize = nx_mon_dom_defunct_finalize,
281 };
282
283 static struct kern_nexus_domain_provider nx_monitor_prov_s = {
284 .nxdom_prov_name = NEXUS_PROVIDER_MONITOR,
285 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
286 .nxdom_prov_cb = {
287 .dp_cb_init = nx_mon_prov_init,
288 .dp_cb_fini = nx_mon_prov_fini,
289 .dp_cb_params = nx_mon_prov_params,
290 .dp_cb_mem_new = nx_mon_prov_mem_new,
291 .dp_cb_config = NULL,
292 .dp_cb_nx_ctor = NULL,
293 .dp_cb_nx_dtor = NULL,
294 .dp_cb_nx_mem_info = NULL, /* not supported */
295 .dp_cb_nx_mib_get = NULL,
296 },
297 };
298
299 static SKMEM_TYPE_DEFINE(na_mon_zone, struct nexus_monitor_adapter);
300
301 #define SKMEM_TAG_MONITORS "com.apple.skywalk.monitors"
302 static SKMEM_TAG_DEFINE(skmem_tag_monitors, SKMEM_TAG_MONITORS);
303
304 static void
nx_mon_dom_init(struct nxdom * nxdom)305 nx_mon_dom_init(struct nxdom *nxdom)
306 {
307 SK_LOCK_ASSERT_HELD();
308 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
309
310 (void) nxdom_prov_add(nxdom, &nx_monitor_prov_s);
311 }
312
313 static void
nx_mon_dom_terminate(struct nxdom * nxdom)314 nx_mon_dom_terminate(struct nxdom *nxdom)
315 {
316 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
317
318 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
319 nxdom_prov_link, tnxdp) {
320 (void) nxdom_prov_del(nxdom_prov);
321 }
322 }
323
324 static void
nx_mon_dom_fini(struct nxdom * nxdom)325 nx_mon_dom_fini(struct nxdom *nxdom)
326 {
327 #pragma unused(nxdom)
328 }
329
330 __attribute__((noreturn))
331 static int
nx_mon_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)332 nx_mon_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
333 struct nxbind *nxb, void *info)
334 {
335 #pragma unused(nx, nx_port, nxb, info)
336 VERIFY(0);
337 /* NOTREACHED */
338 __builtin_unreachable();
339 }
340
341 __attribute__((noreturn))
342 static int
nx_mon_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)343 nx_mon_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
344 {
345 #pragma unused(nx, nx_port)
346 VERIFY(0);
347 /* NOTREACHED */
348 __builtin_unreachable();
349 }
350
351 __attribute__((noreturn))
352 static int
nx_mon_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)353 nx_mon_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
354 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
355 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
356 {
357 #pragma unused(nxdom_prov, nx, ch, chr, ch0, nxb, p)
358 VERIFY(0);
359 /* NOTREACHED */
360 __builtin_unreachable();
361 }
362
363 __attribute__((noreturn))
364 static void
nx_mon_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)365 nx_mon_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
366 struct kern_nexus *nx, struct kern_channel *ch)
367 {
368 #pragma unused(nxdom_prov, nx, ch)
369 VERIFY(0);
370 /* NOTREACHED */
371 __builtin_unreachable();
372 }
373
374 static void
nx_mon_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)375 nx_mon_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
376 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
377 {
378 #pragma unused(nxdom_prov, nx, ch, p)
379 }
380
381 static void
nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)382 nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
383 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
384 {
385 #pragma unused(nxdom_prov, nx, ch, locked)
386 }
387
388 static int
nx_mon_prov_init(struct kern_nexus_domain_provider * nxdom_prov)389 nx_mon_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
390 {
391 #pragma unused(nxdom_prov)
392 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
393 return 0;
394 }
395
396 static int
nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)397 nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
398 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
399 {
400 #pragma unused(nxdom_prov, nxp, adj)
401
402 return 0;
403 }
404
405 static int
nx_mon_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS],uint32_t pp_region_config_flags)406 nx_mon_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
407 const uint32_t req, const struct nxprov_params *nxp0,
408 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS],
409 uint32_t pp_region_config_flags)
410 {
411 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
412
413 return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
414 nxdom, nxdom, nxdom, pp_region_config_flags,
415 nx_mon_prov_params_adjust);
416 }
417
418 static int
nx_mon_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)419 nx_mon_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
420 struct kern_nexus *nx, struct nexus_adapter *na)
421 {
422 #pragma unused(nxdom_prov)
423 int err = 0;
424
425 SK_DF(SK_VERB_MONITOR,
426 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
427 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
428 SK_KVA(na));
429
430 ASSERT(na->na_arena == NULL);
431 ASSERT(NX_USER_CHANNEL_PROV(nx));
432 /*
433 * The underlying nexus adapter uses the same memory allocator
434 * as the monitored adapter; don't store the pp in the nexus.
435 *
436 * This means that clients calling kern_nexus_get_pbufpool()
437 * will get NULL, but this is fine since we don't expose the
438 * monitor to external kernel clients.
439 */
440 na->na_arena = skmem_arena_create_for_nexus(na,
441 NX_PROV(nx)->nxprov_region_params, NULL, NULL, 0, NULL, &err);
442 ASSERT(na->na_arena != NULL || err != 0);
443
444 return err;
445 }
446
447 static void
nx_mon_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)448 nx_mon_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
449 {
450 #pragma unused(nxdom_prov)
451 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
452 }
453
454 static struct nexus_monitor_adapter *
na_mon_alloc(zalloc_flags_t how)455 na_mon_alloc(zalloc_flags_t how)
456 {
457 struct nexus_monitor_adapter *mna;
458
459 _CASSERT(offsetof(struct nexus_monitor_adapter, mna_up) == 0);
460
461 mna = zalloc_flags(na_mon_zone, how | Z_ZERO);
462 if (mna) {
463 mna->mna_up.na_type = NA_MONITOR;
464 mna->mna_up.na_free = na_mon_free;
465 }
466 return mna;
467 }
468
469 static void
na_mon_free(struct nexus_adapter * na)470 na_mon_free(struct nexus_adapter *na)
471 {
472 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
473
474 ASSERT(mna->mna_up.na_refcount == 0);
475 SK_DF(SK_VERB_MEM, "mna 0x%llx FREE", SK_KVA(mna));
476 bzero(mna, sizeof(*mna));
477 zfree(na_mon_zone, mna);
478 }
479
480 /*
481 * Functions common to both kind of monitors.
482 */
483
484 /*
485 * nm_sync callback for the monitor's own tx rings.
486 * This makes no sense and always returns error
487 */
488 static int
nx_mon_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)489 nx_mon_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
490 uint32_t flags)
491 {
492 #pragma unused(kring, p, flags)
493 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_TX,
494 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
495 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
496 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
497 flags);
498 return EIO;
499 }
500
501 /*
502 * nm_sync callback for the monitor's own rx rings.
503 * Note that the lock in nx_mon_zcopy_parent_sync only protects
504 * writers among themselves. Synchronization between writers
505 * (i.e., nx_mon_zcopy_parent_txsync and nx_mon_zcopy_parent_rxsync)
506 * and readers (i.e., nx_mon_zcopy_parent_rxsync) relies on memory barriers.
507 */
508 static int
nx_mon_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)509 nx_mon_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
510 uint32_t flags)
511 {
512 #pragma unused(p, flags)
513 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_RX,
514 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
515 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
516 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
517 flags);
518 kring->ckr_khead = kring->ckr_rhead;
519 membar_sync();
520 return 0;
521 }
522
523 /*
524 * na_krings_create callbacks for monitors.
525 * We could use the default netmap_hw_krings_zmon, but
526 * we don't need the nx_mbq.
527 */
528 static int
nx_mon_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)529 nx_mon_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
530 {
531 ASSERT(na->na_type == NA_MONITOR);
532 return na_rings_mem_setup(na, FALSE, ch);
533 }
534
535 /* na_krings_delete callback for monitors */
536 static void
nx_mon_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)537 nx_mon_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
538 boolean_t defunct)
539 {
540 ASSERT(na->na_type == NA_MONITOR);
541 na_rings_mem_teardown(na, ch, defunct);
542 }
543
544 __attribute__((always_inline))
545 static inline uint32_t
nx_mon_txrx2chmode(enum txrx t)546 nx_mon_txrx2chmode(enum txrx t)
547 {
548 return t == NR_RX ? CHMODE_MONITOR_RX : CHMODE_MONITOR_TX;
549 }
550
551 /* allocate the monitors array in the monitored kring */
552 static int
nx_mon_kr_alloc(struct __kern_channel_ring * kring,uint32_t n)553 nx_mon_kr_alloc(struct __kern_channel_ring *kring, uint32_t n)
554 {
555 struct __kern_channel_ring **nm;
556
557 if (n <= kring->ckr_max_monitors) {
558 /* we already have more entries that requested */
559 return 0;
560 }
561
562 nm = sk_realloc_type_array(struct __kern_channel_ring *,
563 kring->ckr_max_monitors, n, kring->ckr_monitors,
564 Z_WAITOK, skmem_tag_monitors);
565 if (nm == NULL) {
566 return ENOMEM;
567 }
568
569 kring->ckr_monitors = nm;
570 kring->ckr_max_monitors = n;
571
572 return 0;
573 }
574
575 /* deallocate the parent array in the parent adapter */
576 static void
nx_mon_kr_dealloc(struct __kern_channel_ring * kring)577 nx_mon_kr_dealloc(struct __kern_channel_ring *kring)
578 {
579 if (kring->ckr_monitors != NULL) {
580 if (kring->ckr_n_monitors > 0) {
581 SK_ERR("freeing not empty monitor array for \"%s\" "
582 "(%u dangling monitors)!", kring->ckr_name,
583 kring->ckr_n_monitors);
584 }
585 sk_free_type_array(struct __kern_channel_ring *,
586 kring->ckr_max_monitors, kring->ckr_monitors);
587 kring->ckr_monitors = NULL;
588 kring->ckr_max_monitors = 0;
589 kring->ckr_n_monitors = 0;
590 }
591 }
592
593 static int
nx_mon_na_krings_locks(struct nexus_adapter * na,uint32_t qfirst[NR_TXRX],uint32_t qlast[NR_TXRX])594 nx_mon_na_krings_locks(struct nexus_adapter *na,
595 uint32_t qfirst[NR_TXRX], uint32_t qlast[NR_TXRX])
596 {
597 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
598 struct nexus_adapter *pna = mna->mna_pna;
599 enum txrx t;
600 int err = 0;
601
602 for_rx_tx(t) {
603 uint32_t i;
604
605 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
606 continue;
607 }
608
609 qfirst[t] = qlast[t] = mna->mna_first[t];
610
611 /* synchronize with concurrently running nm_sync()s */
612 for (i = mna->mna_first[t]; i < mna->mna_last[t]; i++) {
613 struct __kern_channel_ring *kring;
614
615 /* the parent adapter's kring */
616 kring = &NAKR(pna, t)[i];
617 kr_stop(kring, KR_LOCKED);
618 qlast[t] = i + 1;
619 }
620 if (err != 0) {
621 break;
622 }
623 }
624
625 return err;
626 }
627
628 static void
nx_mon_na_krings_unlock(struct nexus_adapter * na,const uint32_t qfirst[NR_TXRX],const uint32_t qlast[NR_TXRX])629 nx_mon_na_krings_unlock(struct nexus_adapter *na,
630 const uint32_t qfirst[NR_TXRX], const uint32_t qlast[NR_TXRX])
631 {
632 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
633 struct nexus_adapter *pna = mna->mna_pna;
634 enum txrx t;
635
636 for_rx_tx(t) {
637 uint32_t i;
638
639 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
640 continue;
641 }
642
643 /* synchronize with concurrently running nm_sync()s */
644 for (i = qfirst[t]; i < qlast[t]; i++) {
645 struct __kern_channel_ring *kring;
646
647 /* the parent adapter's kring */
648 kring = &NAKR(pna, t)[i];
649 kr_start(kring);
650 }
651 }
652 }
653
654 static int
nx_mon_enable(struct nexus_adapter * na,boolean_t zcopy)655 nx_mon_enable(struct nexus_adapter *na, boolean_t zcopy)
656 {
657 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
658 struct nexus_adapter *pna = mna->mna_pna;
659 struct skmem_arena_nexus *na_arena = skmem_arena_nexus(pna->na_arena);
660 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
661 enum txrx t;
662 int err = 0;
663 uint32_t i;
664
665 ASSERT(!(na->na_flags & NAF_ACTIVE));
666
667 bzero(&qfirst, sizeof(qfirst));
668 bzero(&qlast, sizeof(qlast));
669
670 /*
671 * Acquire the target kring(s). q{first,last}0 represent the
672 * target ring set. q{first,last} represent the ones that have
673 * been successfully acquired. In the event the acquisition
674 * fails, we must release any previously-acquired rings.
675 */
676 if ((err = nx_mon_na_krings_locks(na, qfirst, qlast)) != 0) {
677 goto unlock;
678 }
679
680 ASSERT(na_arena->arn_rx_pp == na_arena->arn_tx_pp);
681 if (na_arena->arn_rx_pp->pp_max_frags > 1) {
682 VERIFY(na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET);
683 mna->mna_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
684 } else {
685 if (na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET) {
686 mna->mna_pkt_copy_from_pkt = pkt_copy_from_pkt;
687 } else {
688 mna->mna_pkt_copy_from_pkt = nx_mon_quantum_copy_64x;
689 }
690 }
691
692 for_rx_tx(t) {
693 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
694 continue;
695 }
696
697 for (i = qfirst[t]; i < qlast[t]; i++) {
698 struct __kern_channel_ring *kring, *mkring;
699
700 /* the parent adapter's kring */
701 kring = &NAKR(pna, t)[i];
702 mkring = &na->na_rx_rings[i];
703 err = nx_mon_add(mkring, kring, zcopy);
704 if (err != 0) {
705 break;
706 }
707 }
708 if (err != 0) {
709 break;
710 }
711 }
712
713 if (err == 0) {
714 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
715 goto unlock;
716 }
717
718 for_rx_tx(t) {
719 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
720 continue;
721 }
722
723 for (i = qfirst[t]; i < qlast[t]; i++) {
724 struct __kern_channel_ring *kring, *mkring;
725
726 /* the parent adapter's kring */
727 kring = &NAKR(pna, t)[i];
728 mkring = &na->na_rx_rings[i];
729 nx_mon_del(mkring, kring, FALSE);
730 }
731 }
732 ASSERT(!(na->na_flags & NAF_ACTIVE));
733
734 unlock:
735 nx_mon_na_krings_unlock(na, qfirst, qlast);
736
737 SK_DF(err ? SK_VERB_ERROR : SK_VERB_MONITOR,
738 "%s (0x%llx): mode 0x%x txrings[%u,%u], rxrings[%u,%u] err %d",
739 na->na_name, SK_KVA(na), mna->mna_mode, qfirst[NR_TX], qlast[NR_TX],
740 qfirst[NR_RX], qlast[NR_RX], err);
741
742 return err;
743 }
744
745 static void
nx_mon_disable(struct nexus_adapter * na)746 nx_mon_disable(struct nexus_adapter *na)
747 {
748 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
749 struct nexus_adapter *pna = mna->mna_pna;
750 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
751 enum txrx t;
752 int err;
753 uint32_t i;
754
755 ASSERT(na->na_flags & NAF_ACTIVE);
756
757 bzero(&qfirst, sizeof(qfirst));
758 bzero(&qlast, sizeof(qlast));
759
760 /* blocking kring(s) acquisition; must not fail */
761 err = nx_mon_na_krings_locks(na, qfirst, qlast);
762 ASSERT(err == 0);
763 mna->mna_pkt_copy_from_pkt = NULL;
764 for_rx_tx(t) {
765 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
766 continue;
767 }
768
769 for (i = qfirst[t]; i < qlast[t]; i++) {
770 struct __kern_channel_ring *kring, *mkring;
771
772 kring = &NAKR(pna, t)[i];
773 mkring = &na->na_rx_rings[i];
774 nx_mon_del(mkring, kring, FALSE);
775 }
776 }
777 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
778
779 nx_mon_na_krings_unlock(na, qfirst, qlast);
780 }
781
782 /*
783 * Add the monitor mkring to the list of monitors of kring.
784 * If this is the first monitor, intercept the callbacks
785 */
786 static int
nx_mon_add(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t zcopy)787 nx_mon_add(struct __kern_channel_ring *mkring,
788 struct __kern_channel_ring *kring, boolean_t zcopy)
789 {
790 int error;
791
792 /* make sure the monitor array exists and is big enough */
793 error = nx_mon_kr_alloc(kring, kring->ckr_n_monitors + 1);
794 if (error != 0) {
795 return error;
796 }
797
798 kring->ckr_monitors[kring->ckr_n_monitors] = mkring;
799 mkring->ckr_mon_pos = kring->ckr_n_monitors;
800 kring->ckr_n_monitors++;
801 if (kring->ckr_n_monitors == 1) {
802 /* this is the first monitor, intercept callbacks */
803 SK_DF(SK_VERB_MONITOR,
804 "mkr \"%s\" (0x%llx) krflags 0x%b intercept callbacks "
805 "on kr \"%s\" (0x%llx) krflags 0x%b", mkring->ckr_name,
806 SK_KVA(mkring), mkring->ckr_flags, CKRF_BITS,
807 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
808 CKRF_BITS);
809 kring->ckr_mon_sync = kring->ckr_na_sync;
810 /*
811 * zcopy monitors do not override nm_notify(), but
812 * we save the original one regardless, so that
813 * nx_mon_del() does not need to know the
814 * monitor type
815 */
816 kring->ckr_mon_notify = kring->ckr_na_notify;
817 if (kring->ckr_tx == NR_TX) {
818 kring->ckr_na_sync =
819 (zcopy ? nx_mon_zcopy_parent_txsync :
820 nx_mon_parent_txsync);
821 } else {
822 kring->ckr_na_sync =
823 (zcopy ? nx_mon_zcopy_parent_rxsync :
824 nx_mon_parent_rxsync);
825 if (!zcopy) {
826 /* also intercept notify */
827 kring->ckr_na_notify = nx_mon_parent_notify;
828 kring->ckr_mon_tail = kring->ckr_ktail;
829 }
830 }
831 } else {
832 SK_DF(SK_VERB_MONITOR,
833 "mkr \"%s\" (0x%llx) krflags 0x%b already intercept "
834 "callbacks on kr \"%s\" (0x%llx) krflags 0x%b, "
835 "%u monitors", mkring->ckr_name, SK_KVA(mkring),
836 mkring->ckr_flags, CKRF_BITS, kring->ckr_name,
837 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
838 kring->ckr_n_monitors);
839 }
840 return 0;
841 }
842
843 /*
844 * Remove the monitor mkring from the list of monitors of kring.
845 * If this is the last monitor, restore the original callbacks
846 */
847 static void
nx_mon_del(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t all)848 nx_mon_del(struct __kern_channel_ring *mkring,
849 struct __kern_channel_ring *kring, boolean_t all)
850 {
851 ASSERT(kring->ckr_n_monitors != 0);
852 if (all) {
853 kring->ckr_n_monitors = 0;
854 } else {
855 kring->ckr_n_monitors--;
856 if (mkring->ckr_mon_pos != kring->ckr_n_monitors) {
857 kring->ckr_monitors[mkring->ckr_mon_pos] =
858 kring->ckr_monitors[kring->ckr_n_monitors];
859 kring->ckr_monitors[mkring->ckr_mon_pos]->ckr_mon_pos =
860 mkring->ckr_mon_pos;
861 }
862 kring->ckr_monitors[kring->ckr_n_monitors] = NULL;
863 }
864 if (kring->ckr_n_monitors == 0) {
865 /*
866 * This was the last monitor, restore callbacks
867 * and delete monitor array.
868 */
869 SK_DF(SK_VERB_MONITOR,
870 "restoring sync callback on kr \"%s\" (0x%llx) "
871 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
872 kring->ckr_flags, CKRF_BITS);
873 kring->ckr_na_sync = kring->ckr_mon_sync;
874 kring->ckr_mon_sync = NULL;
875 if (kring->ckr_tx == NR_RX) {
876 SK_DF(SK_VERB_MONITOR,
877 "restoring notify callback on kr \"%s\" (0x%llx) "
878 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
879 kring->ckr_flags, CKRF_BITS);
880 kring->ckr_na_notify = kring->ckr_mon_notify;
881 kring->ckr_mon_notify = NULL;
882 }
883 nx_mon_kr_dealloc(kring);
884 } else {
885 SK_DF(SK_VERB_MONITOR,
886 "NOT restoring callbacks on kr \"%s\" (0x%llx) "
887 "krflags 0x%b, %u monitors left", kring->ckr_name,
888 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
889 kring->ckr_n_monitors);
890 }
891 }
892
893 /*
894 * This is called when the monitored adapter leaves skywalk mode (see
895 * na_unbind_channel). We need to notify the monitors that the monitored
896 * rings are gone. We do this by setting their mna->mna_pna to NULL.
897 * Note that the rings must be stopped when this happens, so no monitor
898 * ring callback can be active.
899 */
900 void
nx_mon_stop(struct nexus_adapter * na)901 nx_mon_stop(struct nexus_adapter *na)
902 {
903 enum txrx t;
904
905 SK_LOCK_ASSERT_HELD();
906
907 /* skip if this adapter has no allocated rings */
908 if (na->na_tx_rings == NULL) {
909 return;
910 }
911
912 na_disable_all_rings(na);
913
914 for_rx_tx(t) {
915 uint32_t i;
916
917 for (i = 0; i < na_get_nrings(na, t); i++) {
918 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
919 uint32_t j;
920
921 for (j = 0; j < kring->ckr_n_monitors; j++) {
922 struct __kern_channel_ring *mkring =
923 kring->ckr_monitors[j];
924 struct nexus_monitor_adapter *mna =
925 (struct nexus_monitor_adapter *)
926 KRNA(mkring);
927
928 /* forget about this adapter */
929 if (mna->mna_pna != NULL) {
930 ASSERT(na == mna->mna_pna);
931 (void) na_release_locked(mna->mna_pna);
932 mna->mna_pna = NULL;
933 }
934 }
935
936 /*
937 * Remove all monitors and restore callbacks;
938 * this is important for nexus adapters that
939 * are linked to one another, e.g. pipe, since
940 * the callback changes on one adapter affects
941 * its peer during sync times.
942 */
943 if (kring->ckr_n_monitors > 0) {
944 nx_mon_del(NULL, kring, TRUE);
945 }
946
947 ASSERT(kring->ckr_monitors == NULL);
948 ASSERT(kring->ckr_max_monitors == 0);
949 ASSERT(kring->ckr_n_monitors == 0);
950 }
951 }
952
953 na_enable_all_rings(na);
954 }
955
956 /*
957 * Common functions for the na_activate() callbacks of both kind of
958 * monitors.
959 */
960 static int
nx_mon_na_activate_common(struct nexus_adapter * na,na_activate_mode_t mode,boolean_t zcopy)961 nx_mon_na_activate_common(struct nexus_adapter *na, na_activate_mode_t mode,
962 boolean_t zcopy)
963 {
964 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
965 struct nexus_adapter *pna = mna->mna_pna;
966 int err = 0;
967
968 ASSERT(na->na_type == NA_MONITOR);
969
970 SK_DF(SK_VERB_MONITOR, "na \"%s\" (0x%llx) %s zcopy %u", na->na_name,
971 SK_KVA(na), na_activate_mode2str(mode), zcopy);
972
973 switch (mode) {
974 case NA_ACTIVATE_MODE_ON:
975 if (pna == NULL) {
976 /* parent left skywalk mode, fatal */
977 SK_ERR("%s: internal error", na->na_name);
978 err = ENXIO;
979 } else {
980 err = nx_mon_enable(na, zcopy);
981 }
982 break;
983
984 case NA_ACTIVATE_MODE_DEFUNCT:
985 break;
986
987 case NA_ACTIVATE_MODE_OFF:
988 if (pna == NULL) {
989 SK_DF(SK_VERB_MONITOR, "%s: parent left skywalk mode, "
990 "nothing to restore", na->na_name);
991 } else {
992 nx_mon_disable(na);
993 }
994 break;
995
996 default:
997 VERIFY(0);
998 /* NOTREACHED */
999 __builtin_unreachable();
1000 }
1001
1002 return err;
1003 }
1004
1005 /*
1006 * Functions specific for zero-copy monitors.
1007 */
1008
1009 /*
1010 * Common function for both zero-copy tx and rx nm_sync()
1011 * callbacks
1012 */
1013 static int
nx_mon_zcopy_parent_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,enum txrx tx)1014 nx_mon_zcopy_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1015 uint32_t flags, enum txrx tx)
1016 {
1017 struct __kern_channel_ring *mkring = kring->ckr_monitors[0];
1018 int rel_slots, free_slots, busy, sent = 0;
1019 slot_idx_t beg, end, i;
1020 const slot_idx_t lim = kring->ckr_lim;
1021 const slot_idx_t mlim;
1022 int error = 0;
1023
1024 if (mkring == NULL) {
1025 SK_RD(5, "NULL monitor on kr \"%s\" (0x%llx) krflags 0x%b",
1026 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1027 CKRF_BITS);
1028 return 0;
1029 }
1030
1031 ASSERT(!KR_KERNEL_ONLY(kring));
1032 ASSERT(!KR_KERNEL_ONLY(mkring));
1033
1034 /* deconst */
1035 *(slot_idx_t *)(uintptr_t)&mlim = mkring->ckr_lim;
1036
1037 /* get the relased slots (rel_slots) */
1038 if (tx == NR_TX) {
1039 beg = kring->ckr_ktail;
1040 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1041 if (error) {
1042 return error;
1043 }
1044 end = kring->ckr_ktail;
1045 } else { /* NR_RX */
1046 beg = kring->ckr_khead;
1047 end = kring->ckr_rhead;
1048 }
1049
1050 rel_slots = end - beg;
1051 if (rel_slots < 0) {
1052 rel_slots += kring->ckr_num_slots;
1053 }
1054
1055 if (!rel_slots) {
1056 /*
1057 * No released slots, but we still need
1058 * to call rxsync if this is a rx ring
1059 */
1060 goto out_rxsync;
1061 }
1062
1063 /*
1064 * We need to lock the monitor receive ring, since it
1065 * is the target of bot tx and rx traffic from the monitored
1066 * adapter
1067 */
1068 KR_LOCK(mkring);
1069 /* get the free slots available on the monitor ring */
1070 i = mkring->ckr_ktail;
1071 busy = i - mkring->ckr_khead;
1072 if (busy < 0) {
1073 busy += mkring->ckr_num_slots;
1074 }
1075 free_slots = mlim - busy;
1076
1077 if (!free_slots) {
1078 goto out;
1079 }
1080
1081 /* swap min(free_slots, rel_slots) slots */
1082 if (free_slots < rel_slots) {
1083 beg += (rel_slots - free_slots);
1084 if (beg >= kring->ckr_num_slots) {
1085 beg -= kring->ckr_num_slots;
1086 }
1087 rel_slots = free_slots;
1088 }
1089
1090 sent = rel_slots;
1091 for (; rel_slots; rel_slots--) {
1092 /*
1093 * Swap the slots.
1094 *
1095 * XXX: [email protected] -- this bypasses the slot attach/detach
1096 * interface, and needs to be changed when monitor adopts the
1097 * packet APIs. SD_SWAP() will perform a block copy of the
1098 * swap, and will readjust the kernel slot descriptor's sd_user
1099 * accordingly.
1100 */
1101 SD_SWAP(KR_KSD(mkring, i), KR_USD(mkring, i),
1102 KR_KSD(kring, beg), KR_USD(kring, beg));
1103
1104 SK_RD(5, "beg %u buf_idx %u", beg,
1105 METADATA_IDX(KR_KSD(kring, beg)->sd_qum));
1106
1107 beg = SLOT_NEXT(beg, lim);
1108 i = SLOT_NEXT(i, mlim);
1109 }
1110 membar_sync();
1111 mkring->ckr_ktail = i;
1112
1113 out:
1114 KR_UNLOCK(mkring);
1115
1116 if (sent) {
1117 /* notify the new frames to the monitor */
1118 (void) mkring->ckr_na_notify(mkring, p, 0);
1119 }
1120
1121 out_rxsync:
1122 if (tx == NR_RX) {
1123 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1124 }
1125
1126 return error;
1127 }
1128
1129 /*
1130 * Callback used to replace the ckr_na_sync callback in the monitored tx rings.
1131 */
1132 static int
nx_mon_zcopy_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1133 nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1134 uint32_t flags)
1135 {
1136 SK_DF(SK_VERB_MONITOR,
1137 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1138 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1139 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1140 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_TX);
1141 }
1142
1143 /* callback used to replace the nm_sync callback in the monitored rx rings */
1144 static int
nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1145 nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1146 uint32_t flags)
1147 {
1148 SK_DF(SK_VERB_MONITOR,
1149 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1150 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1151 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1152 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_RX);
1153 }
1154
1155 static int
nx_mon_zcopy_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1156 nx_mon_zcopy_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1157 {
1158 return nx_mon_na_activate_common(na, mode, TRUE /* zcopy */);
1159 }
1160
1161 /* na_dtor callback for monitors */
1162 static void
nx_mon_zcopy_na_dtor(struct nexus_adapter * na)1163 nx_mon_zcopy_na_dtor(struct nexus_adapter *na)
1164 {
1165 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1166 struct nexus_adapter *pna = mna->mna_pna;
1167
1168 SK_LOCK_ASSERT_HELD();
1169 ASSERT(na->na_type == NA_MONITOR);
1170
1171 if (pna != NULL) {
1172 (void) na_release_locked(pna);
1173 mna->mna_pna = NULL;
1174 }
1175 }
1176
1177 /*
1178 * Functions specific for copy monitors.
1179 */
1180
1181 static void
nx_mon_parent_sync(struct __kern_channel_ring * kring,struct proc * p,slot_idx_t first_new,int new_slots)1182 nx_mon_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1183 slot_idx_t first_new, int new_slots)
1184 {
1185 nexus_meta_type_t md_type = KRNA(kring)->na_md_type;
1186 uint32_t j;
1187
1188 for (j = 0; j < kring->ckr_n_monitors; j++) {
1189 struct __kern_channel_ring *mkring = kring->ckr_monitors[j];
1190 slot_idx_t i, mlim, beg;
1191 int free_slots, busy, sent = 0, m;
1192 const slot_idx_t lim = kring->ckr_lim;
1193 struct nexus_adapter *dst_na = KRNA(mkring);
1194 struct nexus_monitor_adapter *mna =
1195 (struct nexus_monitor_adapter *)dst_na;
1196 uint32_t max_len = mkring->ckr_pp->pp_max_frags *
1197 PP_BUF_SIZE_DEF(mkring->ckr_pp);
1198
1199 /*
1200 * src and dst adapters must share the same nexus;
1201 * this test is done in nx_monitor_na_find(). This
1202 * covers both buffer and metadata sizes.
1203 */
1204
1205 mlim = mkring->ckr_lim;
1206
1207 /*
1208 * We need to lock the monitor receive ring, since it
1209 * is the target of both tx and rx traffics from the
1210 * monitored adapter.
1211 */
1212 KR_LOCK(mkring);
1213 /* get the free slots available on the monitor ring */
1214 i = mkring->ckr_ktail;
1215 busy = i - mkring->ckr_khead;
1216 if (busy < 0) {
1217 busy += mkring->ckr_num_slots;
1218 }
1219 free_slots = mlim - busy;
1220
1221 if (!free_slots) {
1222 goto out;
1223 }
1224
1225 /* copy min(free_slots, new_slots) slots */
1226 m = new_slots;
1227 beg = first_new;
1228 if (free_slots < m) {
1229 beg += (m - free_slots);
1230 if (beg >= kring->ckr_num_slots) {
1231 beg -= kring->ckr_num_slots;
1232 }
1233 m = free_slots;
1234 }
1235
1236 ASSERT(KRNA(mkring)->na_md_type == md_type);
1237
1238 for (; m; m--) {
1239 struct __kern_slot_desc *src_sd = KR_KSD(kring, beg);
1240 struct __kern_slot_desc *dst_sd = KR_KSD(mkring, i);
1241 struct __kern_packet *spkt, *dpkt;
1242 kern_packet_t sph, dph;
1243 uint32_t copy_len;
1244
1245 if (!KSD_VALID_METADATA(src_sd)) {
1246 goto skip;
1247 }
1248
1249 /* retreive packet handles from slot */
1250 spkt = src_sd->sd_pkt;
1251 sph = SK_PTR_ENCODE(spkt, METADATA_TYPE(spkt),
1252 METADATA_SUBTYPE(spkt));
1253 dpkt = dst_sd->sd_pkt;
1254 dph = SK_PTR_ENCODE(dpkt, METADATA_TYPE(dpkt),
1255 METADATA_SUBTYPE(dpkt));
1256
1257 ASSERT(METADATA_TYPE(spkt) == METADATA_TYPE(dpkt));
1258
1259 ASSERT(spkt->pkt_qum.qum_len <= (UINT32_MAX - 63));
1260 copy_len = spkt->pkt_qum.qum_len;
1261
1262 /* round to a multiple of 64 */
1263 copy_len = (copy_len + 63) & ~63;
1264
1265 if (__improbable(copy_len > max_len)) {
1266 SK_RD(5, "kr \"%s\" -> mkr \"%s\": "
1267 "truncating %u to %u",
1268 kring->ckr_name, mkring->ckr_name,
1269 (uint32_t)copy_len, max_len);
1270 copy_len = max_len;
1271 }
1272
1273 /* copy buffers */
1274 mna->mna_pkt_copy_from_pkt(kring->ckr_tx, dph, 0, sph,
1275 0, copy_len, FALSE, 0, 0, FALSE);
1276
1277 /* copy the associated meta data */
1278 _QUM_COPY(&(spkt)->pkt_qum, &(dpkt)->pkt_qum);
1279 if (md_type == NEXUS_META_TYPE_PACKET) {
1280 _PKT_COPY(spkt, dpkt);
1281 ASSERT(dpkt->pkt_mbuf == NULL);
1282 }
1283
1284 ASSERT(!(dpkt->pkt_qum.qum_qflags & QUM_F_KERNEL_ONLY) ||
1285 PP_KERNEL_ONLY(dpkt->pkt_qum.qum_pp));
1286
1287 sent++;
1288 i = SLOT_NEXT(i, mlim);
1289 skip:
1290 beg = SLOT_NEXT(beg, lim);
1291 }
1292 membar_sync();
1293 mkring->ckr_ktail = i;
1294 out:
1295 KR_UNLOCK(mkring);
1296
1297 if (sent) {
1298 /* notify the new frames to the monitor */
1299 (void) mkring->ckr_na_notify(mkring, p, 0);
1300 }
1301 }
1302 }
1303
1304 /* callback used to replace the nm_sync callback in the monitored tx rings */
1305 static int
nx_mon_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1306 nx_mon_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1307 uint32_t flags)
1308 {
1309 slot_idx_t first_new;
1310 int new_slots;
1311 nexus_type_t nx_type =
1312 kring->ckr_na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
1313
1314 /*
1315 * For user pipe nexus, txsync can also be initated from RX process
1316 * context, hence user pipe tx ring should be accessed holding
1317 * ckr_qlock.
1318 */
1319 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1320 KR_LOCK(kring);
1321 }
1322
1323 /* get the new slots */
1324 first_new = kring->ckr_khead;
1325 new_slots = kring->ckr_rhead - first_new;
1326 if (new_slots < 0) {
1327 new_slots += kring->ckr_num_slots;
1328 }
1329 if (new_slots) {
1330 nx_mon_parent_sync(kring, p, first_new, new_slots);
1331 }
1332
1333 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1334 KR_UNLOCK(kring);
1335 }
1336
1337 return kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1338 }
1339
1340 /* callback used to replace the nm_sync callback in the monitored rx rings */
1341 static int
nx_mon_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1342 nx_mon_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1343 uint32_t flags)
1344 {
1345 slot_idx_t first_new;
1346 int new_slots, error;
1347
1348 /* get the new slots */
1349 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1350 if (error) {
1351 return error;
1352 }
1353 first_new = kring->ckr_mon_tail;
1354 new_slots = kring->ckr_ktail - first_new;
1355 if (new_slots < 0) {
1356 new_slots += kring->ckr_num_slots;
1357 }
1358 if (new_slots) {
1359 nx_mon_parent_sync(kring, p, first_new, new_slots);
1360 }
1361 kring->ckr_mon_tail = kring->ckr_ktail;
1362 return 0;
1363 }
1364
1365 /*
1366 * Callback used to replace the nm_notify() callback in the monitored rx rings
1367 */
1368 static int
nx_mon_parent_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1369 nx_mon_parent_notify(struct __kern_channel_ring *kring, struct proc *p,
1370 uint32_t flags)
1371 {
1372 int err = 0;
1373 sk_protect_t protect = NULL;
1374
1375 SK_DF(SK_VERB_MONITOR | SK_VERB_NOTIFY |
1376 ((kring->ckr_tx == NR_TX) ? SK_VERB_TX : SK_VERB_RX),
1377 "kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x", kring->ckr_name,
1378 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1379 /*
1380 * ?xsync callbacks have tryget called by their callers,
1381 * but here we have to call it by ourself. If we can't
1382 * acquire the exclusive sync right, skip the sync.
1383 */
1384 if ((err = kr_enter(kring, FALSE)) == 0) {
1385 protect = sk_sync_protect();
1386 nx_mon_parent_rxsync(kring, p, NA_SYNCF_FORCE_READ);
1387 sk_sync_unprotect(protect);
1388 kr_exit(kring);
1389 }
1390 /* in all cases (even error), we must invoke notify */
1391 kring->ckr_mon_notify(kring, p, (NA_NOTEF_MONITOR | flags));
1392 return err;
1393 }
1394
1395 static int
nx_mon_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1396 nx_mon_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1397 {
1398 return nx_mon_na_activate_common(na, mode, FALSE /* no zcopy */);
1399 }
1400
1401 static void
nx_mon_na_dtor(struct nexus_adapter * na)1402 nx_mon_na_dtor(struct nexus_adapter *na)
1403 {
1404 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1405 struct nexus_adapter *pna = mna->mna_pna;
1406
1407 SK_LOCK_ASSERT_HELD();
1408 ASSERT(na->na_type == NA_MONITOR);
1409
1410 if (pna != NULL) {
1411 (void) na_release_locked(pna);
1412 mna->mna_pna = NULL;
1413 }
1414 }
1415
1416 /* check if chr is a request for a monitor adapter that we can satisfy */
1417 int
nx_monitor_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1418 nx_monitor_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1419 struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb,
1420 struct proc *p, struct nexus_adapter **na, boolean_t create)
1421 {
1422 #pragma unused(ch)
1423 boolean_t zcopy = !!(chr->cr_mode & CHMODE_MONITOR_NO_COPY);
1424 struct nexus_adapter *pna = NULL; /* parent adapter */
1425 struct nexus_monitor_adapter *mna = NULL;
1426 char monsuff[10] = "";
1427 struct chreq pchr;
1428 uint32_t i;
1429 int error;
1430 enum txrx t;
1431
1432 SK_LOCK_ASSERT_HELD();
1433 *na = NULL;
1434
1435 #if SK_LOG
1436 uuid_string_t uuidstr;
1437 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1438 "ring_id %d ring_set %u ep_type %u:%u ch0 0x%llx create %u%s",
1439 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1440 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1441 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1442 chr->cr_real_endpoint, chr->cr_endpoint, SK_KVA(ch0), create,
1443 !(chr->cr_mode & CHMODE_MONITOR) ? " (skipped)" : "");
1444 #endif /* SK_LOG */
1445
1446 if (!(chr->cr_mode & CHMODE_MONITOR)) {
1447 return 0;
1448 }
1449
1450 /* XXX: Don't allow user packet pool mode in monitor for now */
1451 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
1452 SK_ERR("User Packet pool mode not supported for monitor");
1453 return ENOTSUP;
1454 }
1455
1456 mna = na_mon_alloc(Z_WAITOK);
1457
1458 ASSERT(mna->mna_up.na_type == NA_MONITOR);
1459 ASSERT(mna->mna_up.na_free == na_mon_free);
1460
1461 /* override the ring set since we're monitoring */
1462 chr->cr_ring_set = RING_SET_ALL;
1463
1464 if (ch0 != NULL) {
1465 /*
1466 * We've been given the owning channel from ch_open();
1467 * use this as shortcut since otherwise we'd have to
1468 * find it ourselves.
1469 */
1470 #if (DEBUG || DEVELOPMENT)
1471 ASSERT(!(ch0->ch_info->cinfo_ch_mode & CHMODE_MONITOR));
1472 ASSERT(ch0->ch_info->cinfo_nx_port == chr->cr_port);
1473 #endif /* DEBUG || DEVELOPMENT */
1474 pna = ch0->ch_na;
1475 na_retain_locked(pna);
1476 } else {
1477 /*
1478 * First, try to find the adapter that we want to monitor
1479 * We use the same chr, after we have turned off the monitor
1480 * flags. In this way we can potentially monitor everything
1481 * skywalk understands, except other monitors.
1482 */
1483 memcpy(&pchr, chr, sizeof(pchr));
1484 pchr.cr_mode &= ~CHMODE_MONITOR;
1485 error = na_find(ch, nx, &pchr, ch0, nxb, p, &pna, create);
1486 if (error != 0) {
1487 SK_ERR("parent lookup failed: %d", error);
1488 return error;
1489 }
1490 }
1491 ASSERT(pna != NULL);
1492 SK_DF(SK_VERB_MONITOR,
1493 "found parent: \"%s\" (0x%llx)", pna->na_name, SK_KVA(pna));
1494
1495 if (!NA_IS_ACTIVE(pna)) {
1496 /* parent not in skywalk mode */
1497 /*
1498 * XXX we can wait for the parent to enter skywalk mode,
1499 * by intercepting its na_activate() callback (2014-03-16)
1500 */
1501 SK_ERR("parent \"%s\" (0x%llx) not in skywalk mode",
1502 pna->na_name, SK_KVA(pna));
1503 error = ENXIO;
1504 goto put_out;
1505 } else if (zcopy && NA_KERNEL_ONLY(pna)) {
1506 /*
1507 * Zero-copy mode requires the parent adapter to be
1508 * created in a non-kernel-only mode.
1509 */
1510 SK_ERR("parent \"%s\" (0x%llx) is in kernel-only mode",
1511 pna->na_name, SK_KVA(pna));
1512 error = ENODEV;
1513 goto put_out;
1514 }
1515
1516 /* grab all the rings we need in the parent */
1517 mna->mna_pna = pna;
1518 error = na_interp_ringid(pna, chr->cr_ring_id, chr->cr_ring_set,
1519 mna->mna_first, mna->mna_last);
1520 if (error != 0) {
1521 SK_ERR("ring_mode %u ring_id %d error %d", chr->cr_ring_set,
1522 (int)chr->cr_ring_id, error);
1523 goto put_out;
1524 }
1525 if (mna->mna_last[NR_TX] - mna->mna_first[NR_TX] == 1) {
1526 (void) snprintf(monsuff, 10, "-%u", mna->mna_first[NR_TX]);
1527 }
1528 (void) snprintf(mna->mna_up.na_name, sizeof(mna->mna_up.na_name),
1529 "%s%s/%s%s%s", pna->na_name, monsuff, zcopy ? "z" : "",
1530 (chr->cr_mode & CHMODE_MONITOR_TX) ? "r" : "",
1531 (chr->cr_mode & CHMODE_MONITOR_RX) ? "t" : "");
1532 uuid_generate_random(mna->mna_up.na_uuid);
1533
1534 /* these don't apply to the monitor adapter */
1535 *(nexus_stats_type_t *)(uintptr_t)&mna->mna_up.na_stats_type =
1536 NEXUS_STATS_TYPE_INVALID;
1537 *(uint32_t *)(uintptr_t)&mna->mna_up.na_flowadv_max = 0;
1538
1539 if (zcopy) {
1540 /*
1541 * Zero copy monitors need exclusive access
1542 * to the monitored rings.
1543 */
1544 for_rx_tx(t) {
1545 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1546 continue;
1547 }
1548 for (i = mna->mna_first[t];
1549 i < mna->mna_last[t]; i++) {
1550 struct __kern_channel_ring *kring =
1551 &NAKR(pna, t)[i];
1552 if (kring->ckr_n_monitors > 0) {
1553 error = EBUSY;
1554 SK_ERR("kr \"%s\" already monitored "
1555 "by \"%s\"", kring->ckr_name,
1556 kring->ckr_monitors[0]->ckr_name);
1557 goto put_out;
1558 }
1559 }
1560 }
1561 mna->mna_up.na_activate = nx_mon_zcopy_na_activate;
1562 mna->mna_up.na_dtor = nx_mon_zcopy_na_dtor;
1563 /*
1564 * To have zero copy, we need to use the same memory allocator
1565 * as the monitored port.
1566 */
1567 mna->mna_up.na_arena = pna->na_arena;
1568 skmem_arena_retain((&mna->mna_up)->na_arena);
1569 atomic_bitset_32(&mna->mna_up.na_flags, NAF_MEM_LOANED);
1570 } else {
1571 /* normal monitors are incompatible with zero copy ones */
1572 for_rx_tx(t) {
1573 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1574 continue;
1575 }
1576 for (i = mna->mna_first[t];
1577 i < mna->mna_last[t]; i++) {
1578 struct __kern_channel_ring *kring =
1579 &NAKR(pna, t)[i];
1580 if (kring->ckr_n_monitors > 0 &&
1581 KRNA(kring->ckr_monitors[0])->
1582 na_activate == nx_mon_zcopy_na_activate) {
1583 error = EBUSY;
1584 SK_ERR("kr \"%s\" is busy (zcopy)",
1585 kring->ckr_name);
1586 goto put_out;
1587 }
1588 }
1589 }
1590 mna->mna_up.na_activate = nx_mon_na_activate;
1591 mna->mna_up.na_dtor = nx_mon_na_dtor;
1592 /*
1593 * allocate a new (private) allocator instance using the
1594 * parent nexus configuration.
1595 */
1596 if ((error = nx_monitor_prov_s.nxdom_prov_mem_new(
1597 NX_DOM_PROV(nx), nx, &mna->mna_up)) != 0) {
1598 ASSERT(mna->mna_up.na_arena == NULL);
1599 goto put_out;
1600 }
1601 ASSERT(mna->mna_up.na_arena != NULL);
1602 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1603 }
1604 *(nexus_meta_type_t *)(uintptr_t)&mna->mna_up.na_md_type =
1605 pna->na_md_type;
1606 *(nexus_meta_subtype_t *)(uintptr_t)&mna->mna_up.na_md_subtype =
1607 pna->na_md_subtype;
1608
1609 /* a do-nothing txsync: monitors cannot be used to inject packets */
1610 mna->mna_up.na_txsync = nx_mon_na_txsync;
1611 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1612 mna->mna_up.na_krings_create = nx_mon_na_krings_create;
1613 mna->mna_up.na_krings_delete = nx_mon_na_krings_delete;
1614
1615 /*
1616 * We set the number of our na_rx_rings to be
1617 * max(na_num_tx_rings, na_num_rx_rings) in the parent
1618 */
1619 na_set_nrings(&mna->mna_up, NR_TX, na_get_nrings(pna, NR_TX));
1620 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_RX));
1621 if (na_get_nrings(pna, NR_TX) > na_get_nrings(pna, NR_RX)) {
1622 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_TX));
1623 }
1624 na_set_nslots(&mna->mna_up, NR_TX, na_get_nslots(pna, NR_TX));
1625 na_set_nslots(&mna->mna_up, NR_RX, na_get_nslots(pna, NR_RX));
1626
1627 na_attach_common(&mna->mna_up, nx, &nx_monitor_prov_s);
1628
1629 /* remember the traffic directions we have to monitor */
1630 mna->mna_mode = (chr->cr_mode & CHMODE_MONITOR);
1631
1632 /* keep the reference to the parent */
1633 *na = &mna->mna_up;
1634 na_retain_locked(*na);
1635
1636 /* sanity check: monitor and monitored adapters must share the nexus */
1637 ASSERT((*na)->na_nx == pna->na_nx);
1638
1639 #if SK_LOG
1640 SK_DF(SK_VERB_MONITOR, "created monitor adapter 0x%llx", SK_KVA(mna));
1641 SK_DF(SK_VERB_MONITOR, "na_name: \"%s\"", mna->mna_up.na_name);
1642 SK_DF(SK_VERB_MONITOR, " UUID: %s",
1643 sk_uuid_unparse(mna->mna_up.na_uuid, uuidstr));
1644 SK_DF(SK_VERB_MONITOR, " nx: 0x%llx (\"%s\":\"%s\")",
1645 SK_KVA(mna->mna_up.na_nx), NX_DOM(mna->mna_up.na_nx)->nxdom_name,
1646 NX_DOM_PROV(mna->mna_up.na_nx)->nxdom_prov_name);
1647 SK_DF(SK_VERB_MONITOR, " flags: 0x%b",
1648 mna->mna_up.na_flags, NAF_BITS);
1649 SK_DF(SK_VERB_MONITOR, " rings: tx %u rx %u",
1650 na_get_nrings(&mna->mna_up, NR_TX),
1651 na_get_nrings(&mna->mna_up, NR_RX));
1652 SK_DF(SK_VERB_MONITOR, " slots: tx %u rx %u",
1653 na_get_nslots(&mna->mna_up, NR_TX),
1654 na_get_nslots(&mna->mna_up, NR_RX));
1655 #if CONFIG_NEXUS_USER_PIPE
1656 SK_DF(SK_VERB_MONITOR, " next_pipe: %u", mna->mna_up.na_next_pipe);
1657 SK_DF(SK_VERB_MONITOR, " max_pipes: %u", mna->mna_up.na_max_pipes);
1658 #endif /* CONFIG_NEXUS_USER_PIPE */
1659 SK_DF(SK_VERB_MONITOR, " mna_tx_rings: [%u,%u)", mna->mna_first[NR_TX],
1660 mna->mna_last[NR_TX]);
1661 SK_DF(SK_VERB_MONITOR, " mna_rx_rings: [%u,%u)", mna->mna_first[NR_RX],
1662 mna->mna_last[NR_RX]);
1663 SK_DF(SK_VERB_MONITOR, " mna_mode: %u", mna->mna_mode);
1664 #endif /* SK_LOG */
1665
1666 return 0;
1667
1668 put_out:
1669 if (pna != NULL) {
1670 (void) na_release_locked(pna);
1671 pna = NULL;
1672 }
1673 NA_FREE(&mna->mna_up);
1674 return error;
1675 }
1676
1677 static void
nx_mon_quantum_copy_64x(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t unused_arg1,const uint16_t unused_arg2,const uint16_t unused_arg3,const boolean_t unused_arg4)1678 nx_mon_quantum_copy_64x(const enum txrx t, kern_packet_t dph,
1679 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
1680 const uint32_t len, const boolean_t unused_arg1,
1681 const uint16_t unused_arg2, const uint16_t unused_arg3,
1682 const boolean_t unused_arg4)
1683 {
1684 /* for function prototype parity with pkt_copy_from_pkt_t */
1685 #pragma unused(unused_arg1, unused_arg2, unused_arg3, unused_arg4)
1686 #pragma unused(t, doff, soff)
1687 struct __kern_quantum *dqum = SK_PTR_ADDR_KQUM(dph);
1688 struct __kern_quantum *squm = SK_PTR_ADDR_KQUM(sph);
1689 uint8_t *sbuf, *dbuf;
1690
1691 ASSERT(METADATA_TYPE(squm) == NEXUS_META_TYPE_QUANTUM);
1692 ASSERT(METADATA_TYPE(squm) == METADATA_TYPE(dqum));
1693 VERIFY(IS_P2ALIGNED(len, 64));
1694
1695 MD_BUFLET_ADDR(squm, sbuf);
1696 MD_BUFLET_ADDR(dqum, dbuf);
1697 VERIFY(IS_P2ALIGNED(dbuf, sizeof(uint64_t)));
1698
1699 if (__probable(IS_P2ALIGNED(sbuf, sizeof(uint64_t)))) {
1700 sk_copy64_64x((uint64_t *)(void *)sbuf,
1701 (uint64_t *)(void *)dbuf, len);
1702 } else {
1703 bcopy(sbuf, dbuf, len);
1704 }
1705 /*
1706 * This copy routine only copies to/from a buflet, so the length
1707 * is guaranteed be <= the size of a buflet.
1708 */
1709 VERIFY(len <= UINT16_MAX);
1710 METADATA_SET_LEN(dqum, (uint16_t)len, 0);
1711 }
1712