xref: /xnu-10063.121.3/bsd/skywalk/core/skywalk.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/signalvar.h>      /* for psignal_with_reason */
33 #include <sys/reason.h>
34 #include <sys/kern_memorystatus.h>
35 #if CONFIG_MACF
36 #include <security/mac_framework.h>
37 #endif /* CONFIG_MACF */
38 
39 #ifndef htole16
40 #if BYTE_ORDER == LITTLE_ENDIAN
41 #define htole16(x)      ((uint16_t)(x))
42 #else /* BYTE_ORDER != LITTLE_ENDIAN */
43 #define htole16(x)      bswap16((x))
44 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
45 #endif /* htole16 */
46 
47 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
48 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
49 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
50 
51 static void skywalk_fini(void);
52 static int sk_priv_chk(proc_t, kauth_cred_t, int);
53 
54 static int __sk_inited = 0;
55 #if (DEVELOPMENT || DEBUG)
56 size_t sk_copy_thres = SK_COPY_THRES;
57 uint64_t sk_verbose;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint32_t sk_debug;
60 uint64_t sk_features =
61 #if SKYWALK
62     SK_FEATURE_SKYWALK |
63 #endif
64 #if DEVELOPMENT
65     SK_FEATURE_DEVELOPMENT |
66 #endif
67 #if DEBUG
68     SK_FEATURE_DEBUG |
69 #endif
70 #if CONFIG_NEXUS_FLOWSWITCH
71     SK_FEATURE_NEXUS_FLOWSWITCH |
72 #endif
73 #if CONFIG_NEXUS_MONITOR
74     SK_FEATURE_NEXUS_MONITOR |
75 #endif
76 #if CONFIG_NEXUS_NETIF
77     SK_FEATURE_NEXUS_NETIF |
78 #endif
79 #if CONFIG_NEXUS_USER_PIPE
80     SK_FEATURE_NEXUS_USER_PIPE |
81 #endif
82 #if CONFIG_NEXUS_KERNEL_PIPE
83     SK_FEATURE_NEXUS_KERNEL_PIPE |
84 #endif
85 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
86     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
87 #endif
88 #if (DEVELOPMENT || DEBUG)
89     SK_FEATURE_DEV_OR_DEBUG |
90 #endif
91     0;
92 
93 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
94 
95 /* checksum offload is generic to any nexus (not specific to flowswitch) */
96 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
97 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
98 
99 /* guard pages */
100 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
101 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
102 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
103 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
104 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
105 
106 #if (DEVELOPMENT || DEBUG)
107 uint32_t sk_txring_sz = 0;      /* flowswitch */
108 uint32_t sk_rxring_sz = 0;      /* flowswitch */
109 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
110 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
111 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
112 #endif /* !DEVELOPMENT && !DEBUG */
113 
114 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
115 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
116 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
117 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
118 /*
119  * Configures the RX aggregation logic for TCP in flowswitch.
120  * A non-zero value enables the aggregation logic, with the maximum
121  * aggregation length (in bytes) limited to this value.
122  *
123  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
124  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
125  */
126 uint32_t sk_fsw_rx_agg_tcp = 16384;
127 
128 /*
129  * Forces the RX host path to use or not use aggregation, regardless of the
130  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
131  */
132 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
133 
134 /*
135  * Configures the skywalk infrastructure for handling TCP TX aggregation.
136  * A non-zero value enables the support.
137  */
138 uint32_t sk_fsw_tx_agg_tcp = 1;
139 /*
140  * Configuration to limit the number of buffers for flowswitch VP channel.
141  */
142 uint32_t sk_fsw_max_bufs = 0;
143 /*
144  * GSO MTU for the channel path:
145  *   > 0: enable GSO and use value as the largest supported segment size
146  *  == 0: disable GSO
147  */
148 uint32_t sk_fsw_gso_mtu = 16 * 1024;
149 
150 /* list of interfaces that allow direct open from userspace */
151 #define SK_NETIF_DIRECT_MAX     8
152 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
153 uint32_t sk_netif_direct_cnt = 0;
154 
155 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
156 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
157 
158 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
159 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
160 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
161     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
162 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
163     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
164 
165 /* Wi-Fi Access Point */
166 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
167 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
168 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
169 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
170 
171 /* AWDL */
172 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
173 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
174 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
175 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
176 
177 /* Wi-Fi Infrastructure */
178 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
179 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
180 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
181 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
182 
183 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
184 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
185 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
186     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
187 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
188     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
189 
190 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
191 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
192 
193 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
194 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
195 char sk_ll_prefix[IFNAMSIZ] = "llw";
196 uint32_t sk_rx_sync_packets = 1;
197 uint32_t sk_channel_buflet_alloc = 0;
198 
199 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
200     0, "Skywalk parameters");
201 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
202     0, "Skywalk statistics");
203 
204 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
205     &sk_features, sizeof(sk_features), "-", "Skywalk features");
206 
207 #if (DEVELOPMENT || DEBUG)
208 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
209     &sk_verbose, "Skywalk verbose mode");
210 SYSCTL_UINT(_kern_skywalk, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &sk_debug, 0, "Skywalk debug mode");
212 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_copy_thres, "Skywalk copy threshold");
214 static int __priv_check = 1;
215 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
216     &__priv_check, 0, "Skywalk privilege check");
217 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
218     &sk_opp_defunct, 0, "Defunct opportunistically");
219 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
220     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
221 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
222     &sk_cksum_rx, 0, "Perform inbound checksum offload");
223 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_rx_sync_packets, CTLFLAG_RW | CTLFLAG_LOCKED,
224     &sk_rx_sync_packets, 0, "Enable RX sync packets");
225 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
226     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
227     "channel buflet allocation (enable/disable)");
228 #endif /* !DEVELOPMENT && !DEBUG */
229 
230 #if (DEVELOPMENT || DEBUG)
231 uint32_t sk_inject_error_rmask = 0x3;
232 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
233     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
234 #endif /* !DEVELOPMENT && !DEBUG */
235 
236 #if (DEVELOPMENT || DEBUG)
237 static void skywalk_self_tests(void);
238 #endif /* (DEVELOPMENT || DEBUG) */
239 
240 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
241 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
242 
243 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
244 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
245 
246 #if (SK_LOG || DEVELOPMENT || DEBUG)
247 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
248 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
249 
250 static char *sk_dump_buf;
251 #define SK_DUMP_BUF_SIZE        2048
252 #define SK_DUMP_BUF_ALIGN       16
253 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
254 
255 __startup_func
256 void
__sk_tag_make(const struct sk_tag_spec * spec)257 __sk_tag_make(const struct sk_tag_spec *spec)
258 {
259 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
260 }
261 
262 boolean_t
skywalk_netif_direct_enabled(void)263 skywalk_netif_direct_enabled(void)
264 {
265 	return sk_netif_direct_cnt > 0;
266 }
267 
268 boolean_t
skywalk_netif_direct_allowed(const char * ifname)269 skywalk_netif_direct_allowed(const char *ifname)
270 {
271 	uint32_t i;
272 
273 	for (i = 0; i < sk_netif_direct_cnt; i++) {
274 		if (strncmp(ifname, sk_netif_direct[i], IFXNAMSIZ) == 0) {
275 			return TRUE;
276 		}
277 	}
278 	return FALSE;
279 }
280 
281 #if (DEVELOPMENT || DEBUG)
282 static void
parse_netif_direct(void)283 parse_netif_direct(void)
284 {
285 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
286 	size_t i, curr, len, iflen;
287 
288 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
289 		return;
290 	}
291 
292 	curr = 0;
293 	len = strlen(buf);
294 	for (i = 0; i < len + 1 &&
295 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
296 		if (buf[i] != ',' && buf[i] != '\0') {
297 			continue;
298 		}
299 
300 		buf[i] = '\0';
301 		iflen = i - curr;
302 		if (iflen > 0 && iflen < IFXNAMSIZ) {
303 			(void) strlcpy(sk_netif_direct[sk_netif_direct_cnt],
304 			    buf + curr, IFXNAMSIZ);
305 			sk_netif_direct_cnt++;
306 		}
307 		curr = i + 1;
308 	}
309 }
310 #endif /* DEVELOPMENT || DEBUG */
311 
312 static void
skywalk_fini(void)313 skywalk_fini(void)
314 {
315 	SK_LOCK_ASSERT_HELD();
316 
317 	if (__sk_inited) {
318 #if (DEVELOPMENT || DEBUG)
319 		skmem_test_fini();
320 		cht_test_fini();
321 #endif /* (DEVELOPMENT || DEBUG) */
322 		channel_fini();
323 		nexus_fini();
324 		skmem_fini();
325 		flowidns_fini();
326 
327 #if (SK_LOG || DEVELOPMENT || DEBUG)
328 		if (sk_dump_buf != NULL) {
329 			sk_free_data(sk_dump_buf, SK_DUMP_BUF_SIZE);
330 			sk_dump_buf = NULL;
331 		}
332 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
333 
334 		__sk_inited = 0;
335 	}
336 }
337 
338 int
skywalk_init(void)339 skywalk_init(void)
340 {
341 	int error;
342 
343 	VERIFY(!__sk_inited);
344 
345 	_CASSERT(sizeof(kern_packet_t) == sizeof(uint64_t));
346 	_CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
347 
348 #if (DEVELOPMENT || DEBUG)
349 	PE_parse_boot_argn("sk_debug", &sk_debug, sizeof(sk_debug));
350 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
351 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
352 	    sizeof(sk_opp_defunct));
353 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
354 	    sizeof(sk_cksum_tx));
355 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
356 	    sizeof(sk_cksum_rx));
357 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
358 	    sizeof(sk_txring_sz));
359 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
360 	    sizeof(sk_rxring_sz));
361 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
362 	    sizeof(sk_net_txring_sz));
363 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
364 	    sizeof(sk_net_rxring_sz));
365 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
366 	    sizeof(sk_max_flows));
367 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
368 	    sizeof(sk_netif_native_txmodel));
369 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
370 	    sizeof(sk_netif_compat_txmodel));
371 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
372 	    sizeof(sk_tx_delay_qlen));
373 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
374 	    sizeof(sk_tx_delay_timeout));
375 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
376 	    &sk_netif_compat_aux_cell_tx_ring_sz,
377 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
378 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
379 	    &sk_netif_compat_aux_cell_rx_ring_sz,
380 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
381 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
382 	    &sk_netif_compat_wap_tx_ring_sz,
383 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
384 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
385 	    &sk_netif_compat_wap_rx_ring_sz,
386 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
387 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
388 	    &sk_netif_compat_awdl_tx_ring_sz,
389 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
390 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
391 	    &sk_netif_compat_awdl_rx_ring_sz,
392 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
393 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
394 	    &sk_netif_compat_wif_tx_ring_sz,
395 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
396 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
397 	    &sk_netif_compat_wif_rx_ring_sz,
398 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
399 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
400 	    &sk_netif_compat_usb_eth_tx_ring_sz,
401 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
402 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
403 	    &sk_netif_compat_usb_eth_rx_ring_sz,
404 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
405 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
406 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
407 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
408 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
409 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
410 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
411 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
412 	    sizeof(sk_ll_prefix));
413 	parse_netif_direct();
414 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
415 	    sizeof(sk_fsw_rx_agg_tcp));
416 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
417 	    sizeof(sk_fsw_tx_agg_tcp));
418 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
419 	    sizeof(sk_fsw_gso_mtu));
420 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
421 	    sizeof(sk_fsw_max_bufs));
422 	(void) PE_parse_boot_argn("sk_rx_sync_packets", &sk_rx_sync_packets,
423 	    sizeof(sk_rx_sync_packets));
424 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
425 	    sizeof(sk_channel_buflet_alloc));
426 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
427 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
428 	    sizeof(sk_headguard_sz));
429 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
430 	    sizeof(sk_tailguard_sz));
431 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
432 	    sizeof(sk_min_pool_size));
433 #endif /* DEVELOPMENT || DEBUG */
434 
435 	if (sk_max_flows == 0) {
436 		sk_max_flows = NX_FLOWADV_DEFAULT;
437 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
438 		sk_max_flows = NX_FLOWADV_MAX;
439 	}
440 
441 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
442 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
443 	}
444 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
445 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
446 	}
447 
448 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
449 	    FO_FLOWADV_CHUNK;
450 
451 	if (sk_guard) {
452 		uint32_t sz;
453 		/* leading guard page(s) */
454 		if (sk_headguard_sz == 0) {
455 			read_frandom(&sz, sizeof(sz));
456 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
457 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
458 			sk_headguard_sz = SK_GUARD_MAX;
459 		}
460 		if (sk_headguard_sz < SK_GUARD_MIN) {
461 			sk_headguard_sz = SK_GUARD_MIN;
462 		}
463 		/* trailing guard page(s) */
464 		if (sk_tailguard_sz == 0) {
465 			read_frandom(&sz, sizeof(sz));
466 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
467 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
468 			sk_tailguard_sz = SK_GUARD_MAX;
469 		}
470 		if (sk_tailguard_sz < SK_GUARD_MIN) {
471 			sk_tailguard_sz = SK_GUARD_MIN;
472 		}
473 	} else {
474 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
475 	}
476 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
477 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
478 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
479 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
480 
481 	__sk_inited = 1;
482 
483 	SK_LOCK();
484 	skmem_init();
485 	error = nexus_init();
486 	if (error == 0) {
487 		error = channel_init();
488 	}
489 	if (error != 0) {
490 		skywalk_fini();
491 	}
492 	SK_UNLOCK();
493 
494 	if (error == 0) {
495 #if (SK_LOG || DEVELOPMENT || DEBUG)
496 		/* allocate space for sk_dump_buf */
497 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
498 		    skmem_tag_dump);
499 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
500 
501 		netns_init();
502 		protons_init();
503 		flowidns_init();
504 
505 #if (DEVELOPMENT || DEBUG)
506 		skywalk_self_tests();
507 		skmem_test_init();
508 		cht_test_init();
509 #endif /* DEVELOPMENT || DEBUG */
510 	}
511 
512 	return error;
513 }
514 
515 /*
516  * csproc_get_platform_binary() returns non-zero if the process is platform
517  * code, which means that it is considered part of the Operating System.
518  * On iOS, that means it's contained in the trust cache or a loaded one.
519  * On macOS, everything signed by B&I is currently platform code, but the
520  * policy in general is subject to change.  Thus this is an approximate.
521  */
522 boolean_t
skywalk_check_platform_binary(proc_t p)523 skywalk_check_platform_binary(proc_t p)
524 {
525 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
526 }
527 
528 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)529 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
530 {
531 #pragma unused(p)
532 	int ret = EPERM;
533 
534 	if (cred != NULL) {
535 		ret = priv_check_cred(cred, priv, 0);
536 	}
537 #if (DEVELOPMENT || DEBUG)
538 	if (ret != 0) {
539 		const char *pstr;
540 
541 		switch (priv) {
542 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
543 			pstr = "com.apple.private.skywalk.register-user-pipe";
544 			break;
545 
546 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
547 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
548 			break;
549 
550 		case PRIV_SKYWALK_REGISTER_NET_IF:
551 			pstr = "com.apple.private.skywalk.register-net-if";
552 			break;
553 
554 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
555 			pstr = "com.apple.private.skywalk.register-flow-switch";
556 			break;
557 
558 		case PRIV_SKYWALK_OBSERVE_ALL:
559 			pstr = "com.apple.private.skywalk.observe-all";
560 			break;
561 
562 		case PRIV_SKYWALK_OBSERVE_STATS:
563 			pstr = "com.apple.private.skywalk.observe-stats";
564 			break;
565 
566 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
567 			pstr = "com.apple.private.skywalk.low-latency-channel";
568 			break;
569 
570 		default:
571 			pstr = "unknown";
572 			break;
573 		}
574 
575 #if SK_LOG
576 		if (__priv_check) {
577 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
578 			    "(\"%s\") err %d", sk_proc_name_address(p),
579 			    sk_proc_pid(p), priv, pstr, ret);
580 		} else {
581 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
582 			    "%d (\"%s\") err %d", sk_proc_name_address(p),
583 			    sk_proc_pid(p), priv, pstr, ret);
584 		}
585 #endif /* SK_LOG */
586 
587 		/* ignore privilege check failures if requested */
588 		if (!__priv_check) {
589 			ret = 0;
590 		}
591 	}
592 #endif /* !DEVELOPMENT && !DEBUG */
593 
594 	return ret;
595 }
596 
597 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)598 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
599 {
600 	return sk_priv_chk(p, cred, priv);
601 }
602 
603 #if CONFIG_MACF
604 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)605 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
606 {
607 	int ret;
608 	kauth_cred_t cred = kauth_cred_proc_ref(p);
609 	ret = mac_system_check_info(cred, info_type);
610 	kauth_cred_unref(&cred);
611 
612 	return ret;
613 }
614 #endif /* CONFIG_MACF */
615 
616 /*
617  * Scan thru the list of privileges needed before we allow someone
618  * to open a handle to the Nexus controller.  This should be done
619  * at nxctl_create() time, and additional privilege check specific
620  * to the operation (e.g. register, etc.) should be done afterwards.
621  */
622 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)623 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
624 {
625 	int ret = 0;
626 
627 	if (p == kernproc) {
628 		goto done;
629 	}
630 
631 	do {
632 		/*
633 		 * Check for observe-{stats,all} entitlements first
634 		 * before the rest, to account for nexus controller
635 		 * clients that don't need anything but statistics;
636 		 * it would help quiesce sandbox violation warnings.
637 		 */
638 		if ((ret = sk_priv_chk(p, cred,
639 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
640 			break;
641 		}
642 		if ((ret = sk_priv_chk(p, cred,
643 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
644 			break;
645 		}
646 		if ((ret = sk_priv_chk(p, cred,
647 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
648 			break;
649 		}
650 		if ((ret = sk_priv_chk(p, cred,
651 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
652 			break;
653 		}
654 		if ((ret = sk_priv_chk(p, cred,
655 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
656 			break;
657 		}
658 		if ((ret = sk_priv_chk(p, cred,
659 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
660 			break;
661 		}
662 		/* none set, so too bad */
663 		ret = EPERM;
664 	} while (0);
665 
666 #if (DEVELOPMENT || DEBUG)
667 	if (ret != 0) {
668 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
669 		    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ret);
670 	}
671 #endif /* !DEVELOPMENT && !DEBUG */
672 done:
673 	return ret;
674 }
675 
676 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)677 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
678 {
679 #define GUARD_CH_SIG    0x4348  /* 'CH' */
680 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
681 	union {
682 		uint8_t         _u8[8];
683 		uint16_t        _u16[4];
684 		uint64_t        _u64;
685 	} __u;
686 
687 	read_random(&__u._u16[0], sizeof(uint16_t));
688 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
689 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
690 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
691 	VERIFY(__u._u64 != 0);
692 
693 	bzero(guard, sizeof(*guard));
694 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
695 	    sizeof(uint64_t)));
696 }
697 
698 
699 extern const char *
sk_uuid_unparse(const uuid_t uu,uuid_string_t out)700 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
701 {
702 	uuid_unparse_upper(uu, out);
703 	return (const char *)out;
704 }
705 
706 #if SK_LOG
707 /*
708  * packet-dump function, user-supplied or static buffer.
709  * The destination buffer must be at least 30+4*len
710  *
711  * @param p
712  *   buffer to be dumped.
713  * @param len
714  *   buffer's total length.
715  * @param dumplen
716  *   length to be dumped.
717  * @param dst
718  *   destination char buffer. sk_dump_buf would be used if dst is NULL.
719  * @param lim
720  *   destination char buffer max length. Not used if dst is NULL.
721  *
722  */
723 const char *
sk_dump(const char * label,const void * obj,int len,int dumplen,char * dst,int lim)724 sk_dump(const char *label, const void *obj, int len, int dumplen,
725     char *dst, int lim)
726 {
727 	int i, j, i0, n = 0;
728 	static char hex[] = "0123456789abcdef";
729 	const char *p = obj;    /* dump cursor */
730 	char *o;        /* output position */
731 
732 #define P_HI(x) hex[((x) & 0xf0) >> 4]
733 #define P_LO(x) hex[((x) & 0xf)]
734 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
735 	if (lim <= 0 || lim > len) {
736 		lim = len;
737 	}
738 	if (dst == NULL) {
739 		dst = sk_dump_buf;
740 		lim = SK_DUMP_BUF_SIZE;
741 	}
742 	dumplen = MIN(len, dumplen);
743 	o = dst;
744 	n += scnprintf(o, lim, "%s 0x%llx len %d lim %d\n", label,
745 	    SK_KVA(p), len, lim);
746 	o += strlen(o);
747 	/* hexdump routine */
748 	for (i = 0; i < dumplen;) {
749 		n += scnprintf(o, lim - n, "%5d: ", i);
750 		o += strlen(o);
751 		memset(o, ' ', 48);
752 		i0 = i;
753 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
754 			o[j * 3] = P_HI(p[i]);
755 			o[j * 3 + 1] = P_LO(p[i]);
756 		}
757 		i = i0;
758 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
759 			o[j + 48] = P_C(p[i]);
760 		}
761 		o[j + 48] = '\n';
762 		o += j + 49;
763 	}
764 	*o = '\0';
765 #undef P_HI
766 #undef P_LO
767 #undef P_C
768 	return dst;
769 }
770 
771 /*
772  * "Safe" variant of proc_name_address(), mean to be used only for logging.
773  */
774 const char *
sk_proc_name_address(struct proc * p)775 sk_proc_name_address(struct proc *p)
776 {
777 	if (p == PROC_NULL) {
778 		return "proc_null";
779 	}
780 
781 	return proc_name_address(p);
782 }
783 
784 /*
785  * "Safe" variant of proc_pid(), mean to be used only for logging.
786  */
787 int
sk_proc_pid(struct proc * p)788 sk_proc_pid(struct proc *p)
789 {
790 	if (p == PROC_NULL) {
791 		return -1;
792 	}
793 
794 	return proc_pid(p);
795 }
796 
797 const char *
sk_sa_ntop(struct sockaddr * sa,char * addr_str,size_t addr_strlen)798 sk_sa_ntop(struct sockaddr *sa, char *addr_str, size_t addr_strlen)
799 {
800 	const char *str = NULL;
801 
802 	addr_str[0] = '\0';
803 
804 	switch (sa->sa_family) {
805 	case AF_INET:
806 		str = inet_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
807 		    addr_str, (socklen_t)addr_strlen);
808 		break;
809 
810 	case AF_INET6:
811 		str = inet_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
812 		    addr_str, (socklen_t)addr_strlen);
813 		break;
814 
815 	default:
816 		str = addr_str;
817 		break;
818 	}
819 
820 	return str;
821 }
822 
823 const char *
sk_memstatus2str(uint32_t status)824 sk_memstatus2str(uint32_t status)
825 {
826 	const char *str = NULL;
827 
828 	switch (status) {
829 	case kMemorystatusInvalid:
830 		str = "kMemorystatusInvalid";
831 		break;
832 
833 	case kMemorystatusKilled:
834 		str = "kMemorystatusKilled";
835 		break;
836 
837 	case kMemorystatusKilledHiwat:
838 		str = "kMemorystatusKilledHiwat";
839 		break;
840 
841 	case kMemorystatusKilledVnodes:
842 		str = "kMemorystatusKilledVnodes";
843 		break;
844 
845 	case kMemorystatusKilledVMPageShortage:
846 		str = "kMemorystatusKilledVMPageShortage";
847 		break;
848 
849 	case kMemorystatusKilledProcThrashing:
850 		str = "kMemorystatusKilledProcThrashing";
851 		break;
852 
853 	case kMemorystatusKilledVMCompressorThrashing:
854 		str = "kMemorystatusKilledVMCompressorThrashing";
855 		break;
856 
857 	case kMemorystatusKilledVMCompressorSpaceShortage:
858 		str = "kMemorystatusKilledVMCompressorSpaceShortage";
859 		break;
860 
861 	case kMemorystatusKilledFCThrashing:
862 		str = "kMemorystatusKilledFCThrashing";
863 		break;
864 
865 	case kMemorystatusKilledPerProcessLimit:
866 		str = "kMemorystatusKilledPerProcessLimit";
867 		break;
868 
869 	case kMemorystatusKilledDiskSpaceShortage:
870 		str = "kMemorystatusKilledDiskSpaceShortage";
871 		break;
872 
873 	case kMemorystatusKilledIdleExit:
874 		str = "kMemorystatusKilledIdleExit";
875 		break;
876 
877 	case kMemorystatusKilledZoneMapExhaustion:
878 		str = "kMemorystatusKilledZoneMapExhaustion";
879 		break;
880 
881 	default:
882 		str = "unknown";
883 		break;
884 	}
885 
886 	return str;
887 }
888 #endif /* SK_LOG */
889 
890 bool
sk_sa_has_addr(struct sockaddr * sa)891 sk_sa_has_addr(struct sockaddr *sa)
892 {
893 	switch (sa->sa_family) {
894 	case AF_INET:
895 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
896 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
897 	case AF_INET6:
898 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
899 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
900 	default:
901 		return false;
902 	}
903 }
904 
905 bool
sk_sa_has_port(struct sockaddr * sa)906 sk_sa_has_port(struct sockaddr *sa)
907 {
908 	switch (sa->sa_family) {
909 	case AF_INET:
910 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
911 		return SIN(sa)->sin_port != 0;
912 	case AF_INET6:
913 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
914 		return SIN6(sa)->sin6_port != 0;
915 	default:
916 		return false;
917 	}
918 }
919 
920 /* returns port number in host byte order */
921 uint16_t
sk_sa_get_port(struct sockaddr * sa)922 sk_sa_get_port(struct sockaddr *sa)
923 {
924 	switch (sa->sa_family) {
925 	case AF_INET:
926 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
927 		return ntohs(SIN(sa)->sin_port);
928 	case AF_INET6:
929 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
930 		return ntohs(SIN6(sa)->sin6_port);
931 	default:
932 		VERIFY(0);
933 		/* NOTREACHED */
934 		__builtin_unreachable();
935 	}
936 }
937 
938 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)939 skywalk_kill_process(struct proc *p, uint64_t reason_code)
940 {
941 	os_reason_t exit_reason = OS_REASON_NULL;
942 
943 	VERIFY(p != kernproc);
944 
945 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
946 	if (exit_reason == OS_REASON_NULL) {
947 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
948 		    "0x%llX", sk_proc_name_address(p), sk_proc_pid(p),
949 		    reason_code);
950 	} else {
951 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
952 		SK_ERR("%s(%d) aborted for reason 0x%llX",
953 		    sk_proc_name_address(p), sk_proc_pid(p), reason_code);
954 	}
955 
956 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
957 	    exit_reason);
958 }
959 
960 #if (DEVELOPMENT || DEBUG)
961 #define SK_MEMCMP_LEN 256               /* length of each section */
962 #define SK_MASK_MAXLEN 80               /* maximum mask length */
963 
964 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
965 	_CASSERT(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
966 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
967 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
968 	        panic_plain("\nbroken: " #t " using "                   \
969 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
970 	/* NOTREACHED */                                        \
971 	        __builtin_unreachable();                                \
972 	}                                                               \
973 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
974 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
975 	        panic_plain("\nbroken: " #t " using "                   \
976 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
977 	/* NOTREACHED */                                        \
978 	        __builtin_unreachable();                                \
979 	}                                                               \
980 } while (0)
981 
982 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
983 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
984 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
985 	            " mismatch (expected match) at i=%d s1=0x%x"        \
986 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
987 	/* NOTREACHED */                                        \
988 	        __builtin_unreachable();                                \
989 	}                                                               \
990 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
991 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
992 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
993 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
994 	/* NOTREACHED */                                        \
995 	        __builtin_unreachable();                                \
996 	}                                                               \
997 } while (0)
998 
999 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
1000 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
1001 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1002 	            " match (expected mismatch) at i=%d s1=0x%x"        \
1003 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1004 	/* NOTREACHED */                                        \
1005 	        __builtin_unreachable();                                \
1006 	}                                                               \
1007 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
1008 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1009 	            "_scalar match (expected mismatch) at i=%d "        \
1010 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
1011 	/* NOTREACHED */                                        \
1012 	        __builtin_unreachable();                                \
1013 	}                                                               \
1014 } while (0)
1015 
1016 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
1017 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
1018 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1019 	        panic_plain("\nbroken: " #t " using "                   \
1020 	            "sk_memcmp_mask at i=%d\n", i);                     \
1021 	/* NOTREACHED */                                        \
1022 	        __builtin_unreachable();                                \
1023 	}                                                               \
1024 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
1025 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1026 	        panic_plain("\nbroken: " #t " using "                   \
1027 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
1028 	/* NOTREACHED */                                        \
1029 	        __builtin_unreachable();                                \
1030 	}                                                               \
1031 } while (0)
1032 
1033 static inline int
skywalk_memcmp_mask_ref(const uint8_t * src1,const uint8_t * src2,const uint8_t * byte_mask,size_t n)1034 skywalk_memcmp_mask_ref(const uint8_t *src1, const uint8_t *src2,
1035     const uint8_t *byte_mask, size_t n)
1036 {
1037 	uint32_t result = 0;
1038 	for (size_t i = 0; i < n; i++) {
1039 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
1040 	}
1041 	return result;
1042 }
1043 
1044 static void
skywalk_memcmp_mask_self_tests(void)1045 skywalk_memcmp_mask_self_tests(void)
1046 {
1047 	static const uint8_t ipv4_m[] = {
1048 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1049 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1050 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1051 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1052 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1053 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1054 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1055 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1056 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1057 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1058 	};
1059 	static const uint8_t ipv6_m[] = {
1060 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1061 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1062 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1063 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1064 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1065 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1066 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1067 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1068 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1069 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1070 	};
1071 	static const uint8_t tcp_m[] = {
1072 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1073 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1074 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1075 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1076 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1077 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1082 	};
1083 	static const uint8_t ipv6_tcp_m[] = {
1084 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1085 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1086 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1087 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1088 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1089 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1090 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1091 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1094 	};
1095 	static const uint8_t udp_m[] = {
1096 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1097 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1098 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1099 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1100 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1101 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1102 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1103 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1104 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1105 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1106 	};
1107 	static const uint8_t fk_all_m[] = {
1108 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1109 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1110 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1111 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1112 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1113 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 	};
1119 	static const uint8_t fk_t2_m[] = {
1120 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1121 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1122 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1123 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1124 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1125 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1126 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1127 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1128 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1129 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1130 	};
1131 	static const uint8_t fk_t3_m[] = {
1132 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1133 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1134 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1135 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1136 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1137 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1142 	};
1143 	static const uint8_t fk_t4_m[] = {
1144 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1145 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1146 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1147 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1148 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1149 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1150 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1151 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1152 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1153 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1154 	};
1155 	static const uint8_t fk_t5_m[] = {
1156 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1157 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1158 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1159 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1160 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1161 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1162 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1163 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1164 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1166 	};
1167 	static const uint8_t fk_i1_m[] = {
1168 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1169 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1170 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1171 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1172 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1173 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1174 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1175 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1176 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1177 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1178 	};
1179 	static const uint8_t fk_i2_m[] = {
1180 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1181 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1182 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1183 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1184 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1185 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1186 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1187 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1188 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1189 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1190 	};
1191 	static const uint8_t fk_i3_m[] = {
1192 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1193 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1194 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1195 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1196 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1197 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1198 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1199 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1200 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1201 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1202 	};
1203 
1204 	/* validate flow entry mask (2-tuple) */
1205 	_CASSERT(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1206 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1207 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1208 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1209 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1210 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1211 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1212 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1213 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1214 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1215 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1216 
1217 	_CASSERT(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1218 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1219 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1220 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1221 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1222 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1223 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1224 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1225 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1226 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1227 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1228 
1229 	_CASSERT(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1230 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1231 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1232 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1233 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1234 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1235 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1236 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1237 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1238 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1239 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1240 
1241 	_CASSERT(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1242 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1243 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1244 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1245 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1246 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1247 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1248 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1249 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1250 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1251 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1252 
1253 	_CASSERT(FKMASK_IPFLOW1 == FKMASK_PROTO);
1254 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1255 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1256 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1257 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1258 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1259 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1260 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1261 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1262 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1263 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1264 
1265 	_CASSERT(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1266 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1267 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1268 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1269 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1270 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1271 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1272 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1273 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1274 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1275 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1276 
1277 	_CASSERT(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1278 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1279 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1280 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1281 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1282 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1283 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1284 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1285 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1286 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1287 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1288 
1289 	VERIFY(sk_dump_buf != NULL);
1290 
1291 	/* reset sk_dump_buf */
1292 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1293 
1294 	/*
1295 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1296 	 * section begins on a 128-bit boundary, and is a multiple of
1297 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1298 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1299 	 *
1300 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1301 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1302 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1303 	 */
1304 	void *s1, *s2, *s3;
1305 
1306 	s1 = sk_dump_buf;
1307 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1308 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1309 	}
1310 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1311 	s2 = (void *)((uintptr_t)s1 + SK_MEMCMP_LEN);
1312 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1313 	s3 = (void *)((uintptr_t)s2 + SK_MEMCMP_LEN);
1314 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1315 
1316 	uint8_t *hdr1 = s1;
1317 	uint8_t *hdr2 = s2;
1318 	uint8_t *byte_m = s3;
1319 
1320 	/* fill byte mask with random data */
1321 	read_frandom(byte_m, SK_MEMCMP_LEN);
1322 
1323 	kprintf("Skywalk: memcmp mask ... ");
1324 
1325 	int i;
1326 	for (i = 0; i < 80; i++) {
1327 		hdr1[i] = 1;
1328 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1329 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1330 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1331 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1332 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1333 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1334 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1335 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1336 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1337 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1338 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1339 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1340 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1341 		hdr2[i] = 1;
1342 	}
1343 
1344 	bzero(hdr1, SK_MEMCMP_LEN);
1345 	bzero(hdr2, SK_MEMCMP_LEN);
1346 
1347 	/* re-fill byte mask with random data */
1348 	read_frandom(byte_m, SK_MEMCMP_LEN);
1349 
1350 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1351 		hdr1[i] = 1;
1352 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1353 		hdr2[i] = 1;
1354 	}
1355 
1356 	/* fill hdr1 and hd2 with random data */
1357 	read_frandom(hdr1, SK_MEMCMP_LEN);
1358 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1359 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1360 
1361 	for (i = 0; i < 80; i++) {
1362 		uint8_t val = hdr2[i];
1363 		uint8_t mval = byte_m[i];
1364 
1365 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1366 			uint8_t newval;
1367 			read_frandom(&newval, sizeof(newval));
1368 			hdr2[i] = newval;
1369 		}
1370 		if (i < 16) {
1371 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1372 		} else if (i < 32) {
1373 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1374 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1375 		} else if (i < 48) {
1376 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1377 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1378 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1379 		} else if (i < 64) {
1380 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1381 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1382 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1383 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1384 		} else if (i < 80) {
1385 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1386 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1387 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1388 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1389 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1390 		}
1391 		byte_m[i] = 0;
1392 		if (i < 16) {
1393 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1394 		} else if (i < 32) {
1395 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1396 		} else if (i < 48) {
1397 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1398 		} else if (i < 64) {
1399 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1400 		} else if (i < 80) {
1401 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1402 		}
1403 		hdr2[i] = val;
1404 		byte_m[i] = mval;
1405 	}
1406 
1407 	kprintf("PASSED\n");
1408 }
1409 
1410 #define SK_COPY_LEN     128             /* length of each section */
1411 
1412 #define SK_COPY_PREPARE(t) do {                                         \
1413 	bzero(s2, SK_COPY_LEN);                                         \
1414 	bzero(s3, SK_COPY_LEN);                                         \
1415 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1416 	kprintf("Skywalk: " #t " ... ");                                \
1417 } while (0)
1418 
1419 #define SK_COPY_VERIFY(t) do {                                          \
1420 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1421 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1422 	/* NOTREACHED */                                        \
1423 	        __builtin_unreachable();                                \
1424 	}                                                               \
1425 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1426 	        panic_plain("\nbroken: " #t "\n");                      \
1427 	/* NOTREACHED */                                        \
1428 	        __builtin_unreachable();                                \
1429 	} else {                                                        \
1430 	        kprintf("PASSED\n");                                    \
1431 	}                                                               \
1432 } while (0)
1433 
1434 #define SK_ZERO_PREPARE(t) do {                                         \
1435 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1436 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1437 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1438 	kprintf("Skywalk: " #t " ... ");                                \
1439 } while (0)
1440 
1441 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1442 
1443 static void
skywalk_self_tests(void)1444 skywalk_self_tests(void)
1445 {
1446 	void *s1, *s2, *s3;
1447 	void *_s1, *_s2, *_s3;
1448 
1449 	VERIFY(sk_dump_buf != NULL);
1450 
1451 	/*
1452 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1453 	 * section begins on a 128-bit boundary, and is a multiple of
1454 	 * 64-bytes len.  A section is 128-bytes long, which means we
1455 	 * need at least 16+(3*128) bytes.
1456 	 *
1457 	 * 1st section is source buffer full of random data;
1458 	 * 2nd section is reference target based on bcopy;
1459 	 * 3rd section is test target base on our stuff.
1460 	 */
1461 	_CASSERT(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1462 	_CASSERT((SK_COPY_LEN % 16) == 0);
1463 	_CASSERT((SK_DUMP_BUF_ALIGN % 16) == 0);
1464 	_CASSERT(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1465 
1466 	s1 = sk_dump_buf;
1467 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1468 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1469 	}
1470 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1471 	s2 = (void *)((uintptr_t)s1 + SK_COPY_LEN);
1472 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1473 	s3 = (void *)((uintptr_t)s2 + SK_COPY_LEN);
1474 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1475 
1476 	/* fill s1 with random data */
1477 	read_frandom(s1, SK_COPY_LEN);
1478 
1479 	kprintf("Skywalk: running self-tests\n");
1480 
1481 	/* Copy 8-bytes, 64-bit aligned */
1482 	SK_COPY_PREPARE(sk_copy64_8);
1483 	bcopy(s1, s2, 8);
1484 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1485 	SK_COPY_VERIFY(sk_copy64_8);
1486 
1487 	/* Copy 8-bytes, 32-bit aligned */
1488 	SK_COPY_PREPARE(sk_copy32_8);
1489 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1490 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 8);
1491 	sk_copy32_8((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1492 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1493 	SK_COPY_VERIFY(sk_copy32_8);
1494 
1495 	/* Copy 16-bytes, 64-bit aligned */
1496 	SK_COPY_PREPARE(sk_copy64_16);
1497 	bcopy(s1, s2, 16);
1498 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1499 	SK_COPY_VERIFY(sk_copy64_16);
1500 
1501 	/* Copy 16-bytes, 32-bit aligned */
1502 	SK_COPY_PREPARE(sk_copy32_16);
1503 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1504 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 16);
1505 	sk_copy32_16((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1506 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1507 	SK_COPY_VERIFY(sk_copy32_16);
1508 
1509 	/* Copy 20-bytes, 64-bit aligned */
1510 	SK_COPY_PREPARE(sk_copy64_20);
1511 	bcopy(s1, s2, 20);
1512 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1513 	SK_COPY_VERIFY(sk_copy64_20);
1514 
1515 	/* Copy 24-bytes, 64-bit aligned */
1516 	SK_COPY_PREPARE(sk_copy64_24);
1517 	bcopy(s1, s2, 24);
1518 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1519 	SK_COPY_VERIFY(sk_copy64_24);
1520 
1521 	/* Copy 32-bytes, 64-bit aligned */
1522 	SK_COPY_PREPARE(sk_copy64_32);
1523 	bcopy(s1, s2, 32);
1524 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1525 	SK_COPY_VERIFY(sk_copy64_32);
1526 
1527 	/* Copy 32-bytes, 32-bit aligned */
1528 	SK_COPY_PREPARE(sk_copy32_32);
1529 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1530 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 32);
1531 	sk_copy32_32((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1532 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1533 	SK_COPY_VERIFY(sk_copy32_32);
1534 
1535 	/* Copy 40-bytes, 64-bit aligned */
1536 	SK_COPY_PREPARE(sk_copy64_40);
1537 	bcopy(s1, s2, 40);
1538 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1539 	SK_COPY_VERIFY(sk_copy64_40);
1540 
1541 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1542 	SK_COPY_PREPARE(sk_copy64_64x);
1543 	bcopy(s1, s2, SK_COPY_LEN);
1544 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1545 	SK_COPY_VERIFY(sk_copy64_64x);
1546 
1547 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1548 	SK_COPY_PREPARE(sk_copy64_32x);
1549 	bcopy(s1, s2, SK_COPY_LEN);
1550 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1551 	SK_COPY_VERIFY(sk_copy64_32x);
1552 
1553 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1554 	SK_COPY_PREPARE(sk_copy64_8x);
1555 	bcopy(s1, s2, SK_COPY_LEN);
1556 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1557 	SK_COPY_VERIFY(sk_copy64_8x);
1558 
1559 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1560 	SK_COPY_PREPARE(sk_copy64_4x);
1561 	bcopy(s1, s2, SK_COPY_LEN);
1562 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1563 	SK_COPY_VERIFY(sk_copy64_4x);
1564 
1565 	/*
1566 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1567 	 *
1568 	 * 1st section is source buffer full of random data;
1569 	 * 2nd section is reference target based on bzero;
1570 	 * 3rd section is test target base on our stuff.
1571 	 */
1572 	SK_ZERO_PREPARE(sk_zero_16);
1573 	bzero(s2, 16);
1574 	sk_zero_16(s3);
1575 	SK_ZERO_VERIFY(sk_zero_16);
1576 
1577 	SK_ZERO_PREPARE(sk_zero_32);
1578 	bzero(s2, 32);
1579 	sk_zero_32(s3);
1580 	SK_ZERO_VERIFY(sk_zero_32);
1581 
1582 	SK_ZERO_PREPARE(sk_zero_48);
1583 	bzero(s2, 48);
1584 	sk_zero_48(s3);
1585 	SK_ZERO_VERIFY(sk_zero_48);
1586 
1587 	SK_ZERO_PREPARE(sk_zero_128);
1588 	bzero(s2, 128);
1589 	sk_zero_128(s3);
1590 	SK_ZERO_VERIFY(sk_zero_128);
1591 
1592 	/* Perform memcmp with mask self tests */
1593 	skywalk_memcmp_mask_self_tests();
1594 
1595 	/* reset sk_dump_buf */
1596 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1597 
1598 	/* Keep packet trace code in sync with ariadne plist */
1599 	_CASSERT(SK_KTRACE_AON_IF_STATS == 0x8100004);
1600 
1601 	_CASSERT(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1602 	_CASSERT(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1603 	_CASSERT(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1604 
1605 	_CASSERT(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1606 	_CASSERT(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1607 	_CASSERT(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1608 	_CASSERT(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1609 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1610 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1611 
1612 	_CASSERT(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1613 
1614 	_CASSERT(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1615 	_CASSERT(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1616 	_CASSERT(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1617 	_CASSERT(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1618 	_CASSERT(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1619 	_CASSERT(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1620 }
1621 #endif /* DEVELOPMENT || DEBUG */
1622