xref: /xnu-12377.61.12/bsd/skywalk/core/skywalk.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/reason.h>
33 #include <netinet/inp_log.h>
34 #if CONFIG_MACF
35 #include <security/mac_framework.h>
36 #endif /* CONFIG_MACF */
37 
38 #ifndef htole16
39 #if BYTE_ORDER == LITTLE_ENDIAN
40 #define htole16(x)      ((uint16_t)(x))
41 #else /* BYTE_ORDER != LITTLE_ENDIAN */
42 #define htole16(x)      bswap16((x))
43 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
44 #endif /* htole16 */
45 
46 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
47 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
48 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
49 
50 static void skywalk_fini(void);
51 static int sk_priv_chk(proc_t, kauth_cred_t, int);
52 
53 static int __sk_inited = 0;
54 uint64_t sk_verbose;
55 
56 #if (DEVELOPMENT || DEBUG)
57 size_t sk_copy_thres = SK_COPY_THRES;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint64_t sk_features =
60 #if SKYWALK
61     SK_FEATURE_SKYWALK |
62 #endif
63 #if DEVELOPMENT
64     SK_FEATURE_DEVELOPMENT |
65 #endif
66 #if DEBUG
67     SK_FEATURE_DEBUG |
68 #endif
69 #if CONFIG_NEXUS_FLOWSWITCH
70     SK_FEATURE_NEXUS_FLOWSWITCH |
71 #endif
72 #if CONFIG_NEXUS_NETIF
73     SK_FEATURE_NEXUS_NETIF |
74 #endif
75 #if CONFIG_NEXUS_USER_PIPE
76     SK_FEATURE_NEXUS_USER_PIPE |
77 #endif
78 #if CONFIG_NEXUS_KERNEL_PIPE
79     SK_FEATURE_NEXUS_KERNEL_PIPE |
80 #endif
81 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
82     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
83 #endif
84 #if (DEVELOPMENT || DEBUG)
85     SK_FEATURE_DEV_OR_DEBUG |
86 #endif
87     0;
88 
89 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
90 
91 /* checksum offload is generic to any nexus (not specific to flowswitch) */
92 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
93 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
94 
95 /* guard pages */
96 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
97 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
98 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
99 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
100 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
101 
102 #if (DEVELOPMENT || DEBUG)
103 uint32_t sk_txring_sz = 0;      /* flowswitch */
104 uint32_t sk_rxring_sz = 0;      /* flowswitch */
105 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
106 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
107 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
108 #endif /* !DEVELOPMENT && !DEBUG */
109 
110 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
111 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
112 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
113 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
114 /*
115  * Configures the RX aggregation logic for TCP in flowswitch.
116  * A non-zero value enables the aggregation logic, with the maximum
117  * aggregation length (in bytes) limited to this value.
118  *
119  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
120  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
121  */
122 uint32_t sk_fsw_rx_agg_tcp = 16384;
123 
124 /*
125  * Forces the RX host path to use or not use aggregation, regardless of the
126  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
127  */
128 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
129 
130 /*
131  * Configures the skywalk infrastructure for handling TCP TX aggregation.
132  * A non-zero value enables the support.
133  */
134 uint32_t sk_fsw_tx_agg_tcp = 1;
135 /*
136  * Configuration to limit the number of buffers for flowswitch VP channel.
137  */
138 uint32_t sk_fsw_max_bufs = 0;
139 /*
140  * GSO MTU for the channel path:
141  *   > 0: enable GSO and use value as the largest supported segment size
142  *  == 0: disable GSO
143  */
144 uint32_t sk_fsw_gso_mtu = 16 * 1024;
145 
146 /* list of interfaces that allow direct open from userspace */
147 #define SK_NETIF_DIRECT_MAX     8
148 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
149 uint32_t sk_netif_direct_cnt = 0;
150 
151 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
152 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
153 
154 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
155 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
156 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
157     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
158 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
159     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
160 
161 /* Wi-Fi Access Point */
162 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
163 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
164 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
165 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
166 
167 /* AWDL */
168 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
169 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
170 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
171 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
172 
173 /* Wi-Fi Infrastructure */
174 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
175 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
176 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
177 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
178 
179 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
180 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
181 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
182     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
183 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
184     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
185 
186 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
187 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
188 
189 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
190 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
191 char sk_ll_prefix[IFNAMSIZ] = "llw";
192 uint32_t sk_channel_buflet_alloc = 0;
193 uint32_t sk_netif_queue_stat_enable = 0;
194 
195 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
196     0, "Skywalk parameters");
197 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
198     0, "Skywalk statistics");
199 
200 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
201     &sk_features, sizeof(sk_features), "-", "Skywalk features");
202 
203 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
204     &sk_verbose, "Skywalk verbose mode");
205 
206 #if (DEVELOPMENT || DEBUG)
207 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
208     &sk_copy_thres, "Skywalk copy threshold");
209 static int __priv_check = 1;
210 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &__priv_check, 0, "Skywalk privilege check");
212 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_opp_defunct, 0, "Defunct opportunistically");
214 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
216 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
217     &sk_cksum_rx, 0, "Perform inbound checksum offload");
218 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
219     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
220     "channel buflet allocation (enable/disable)");
221 
222 uint32_t sk_inject_error_rmask = 0x3;
223 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
224     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
225 
226 static void skywalk_self_tests(void);
227 #endif /* (DEVELOPMENT || DEBUG) */
228 
229 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
230 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
231 
232 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
233 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
234 
235 #if (SK_LOG || DEVELOPMENT || DEBUG)
236 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
237 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
238 
239 static uint32_t sk_dump_buf_size;
240 static char *__sized_by(sk_dump_buf_size) sk_dump_buf;
241 #define SK_DUMP_BUF_ALIGN       16
242 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
243 
244 os_log_t sk_log_handle;
245 
246 __startup_func
247 void
__sk_tag_make(const struct sk_tag_spec * spec)248 __sk_tag_make(const struct sk_tag_spec *spec)
249 {
250 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
251 }
252 
253 boolean_t
skywalk_netif_direct_enabled(void)254 skywalk_netif_direct_enabled(void)
255 {
256 	return sk_netif_direct_cnt > 0;
257 }
258 
259 boolean_t
skywalk_netif_direct_allowed(const char * ifname)260 skywalk_netif_direct_allowed(const char *ifname)
261 {
262 	uint32_t i;
263 
264 	for (i = 0; i < sk_netif_direct_cnt; i++) {
265 		if (strlcmp(sk_netif_direct[i], ifname, IFXNAMSIZ) == 0) {
266 			return TRUE;
267 		}
268 	}
269 	return FALSE;
270 }
271 
272 #if (DEVELOPMENT || DEBUG)
273 static void
parse_netif_direct(void)274 parse_netif_direct(void)
275 {
276 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
277 	size_t i, curr, len, iflen;
278 
279 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
280 		return;
281 	}
282 
283 	curr = 0;
284 	len = strbuflen(buf);
285 	for (i = 0; i < len + 1 &&
286 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
287 		if (buf[i] != ',' && buf[i] != '\0') {
288 			continue;
289 		}
290 
291 		buf[i] = '\0';
292 		iflen = i - curr;
293 		if (iflen > 0 && iflen < IFXNAMSIZ) {
294 			(void) strbufcpy(sk_netif_direct[sk_netif_direct_cnt],
295 			    IFXNAMSIZ, buf + curr, IFXNAMSIZ);
296 			sk_netif_direct_cnt++;
297 		}
298 		curr = i + 1;
299 	}
300 }
301 #endif /* DEVELOPMENT || DEBUG */
302 
303 static void
skywalk_fini(void)304 skywalk_fini(void)
305 {
306 	SK_LOCK_ASSERT_HELD();
307 
308 	if (__sk_inited) {
309 #if (DEVELOPMENT || DEBUG)
310 		skmem_test_fini();
311 		cht_test_fini();
312 #endif /* (DEVELOPMENT || DEBUG) */
313 		channel_fini();
314 		nexus_fini();
315 		skmem_fini();
316 		flowidns_fini();
317 
318 #if (SK_LOG || DEVELOPMENT || DEBUG)
319 		if (sk_dump_buf != NULL) {
320 			sk_free_data_sized_by(sk_dump_buf, sk_dump_buf_size);
321 			sk_dump_buf = NULL;
322 			sk_dump_buf_size = 0;
323 		}
324 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
325 
326 		__sk_inited = 0;
327 	}
328 }
329 
330 int
skywalk_init(void)331 skywalk_init(void)
332 {
333 	int error;
334 
335 	VERIFY(!__sk_inited);
336 
337 	static_assert(sizeof(kern_packet_t) == sizeof(uint64_t));
338 	static_assert(sizeof(bitmap_t) == sizeof(uint64_t));
339 
340 	sk_log_handle = os_log_create("com.apple.xnu", "skywalk");
341 
342 #if (DEVELOPMENT || DEBUG)
343 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
344 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
345 	    sizeof(sk_opp_defunct));
346 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
347 	    sizeof(sk_cksum_tx));
348 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
349 	    sizeof(sk_cksum_rx));
350 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
351 	    sizeof(sk_txring_sz));
352 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
353 	    sizeof(sk_rxring_sz));
354 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
355 	    sizeof(sk_net_txring_sz));
356 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
357 	    sizeof(sk_net_rxring_sz));
358 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
359 	    sizeof(sk_max_flows));
360 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
361 	    sizeof(sk_netif_native_txmodel));
362 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
363 	    sizeof(sk_netif_compat_txmodel));
364 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
365 	    sizeof(sk_tx_delay_qlen));
366 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
367 	    sizeof(sk_tx_delay_timeout));
368 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
369 	    &sk_netif_compat_aux_cell_tx_ring_sz,
370 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
371 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
372 	    &sk_netif_compat_aux_cell_rx_ring_sz,
373 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
374 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
375 	    &sk_netif_compat_wap_tx_ring_sz,
376 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
377 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
378 	    &sk_netif_compat_wap_rx_ring_sz,
379 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
380 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
381 	    &sk_netif_compat_awdl_tx_ring_sz,
382 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
383 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
384 	    &sk_netif_compat_awdl_rx_ring_sz,
385 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
386 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
387 	    &sk_netif_compat_wif_tx_ring_sz,
388 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
389 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
390 	    &sk_netif_compat_wif_rx_ring_sz,
391 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
392 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
393 	    &sk_netif_compat_usb_eth_tx_ring_sz,
394 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
395 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
396 	    &sk_netif_compat_usb_eth_rx_ring_sz,
397 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
398 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
399 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
400 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
401 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
402 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
403 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
404 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
405 	    sizeof(sk_ll_prefix));
406 	(void) PE_parse_boot_argn("sk_netif_q_stats", &sk_netif_queue_stat_enable,
407 	    sizeof(sk_netif_queue_stat_enable));
408 	parse_netif_direct();
409 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
410 	    sizeof(sk_fsw_rx_agg_tcp));
411 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
412 	    sizeof(sk_fsw_tx_agg_tcp));
413 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
414 	    sizeof(sk_fsw_gso_mtu));
415 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
416 	    sizeof(sk_fsw_max_bufs));
417 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
418 	    sizeof(sk_channel_buflet_alloc));
419 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
420 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
421 	    sizeof(sk_headguard_sz));
422 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
423 	    sizeof(sk_tailguard_sz));
424 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
425 	    sizeof(sk_min_pool_size));
426 #endif /* DEVELOPMENT || DEBUG */
427 
428 	if (sk_max_flows == 0) {
429 		sk_max_flows = NX_FLOWADV_DEFAULT;
430 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
431 		sk_max_flows = NX_FLOWADV_MAX;
432 	}
433 
434 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
435 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
436 	}
437 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
438 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
439 	}
440 
441 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
442 	    FO_FLOWADV_CHUNK;
443 
444 	if (sk_guard) {
445 		uint32_t sz;
446 		/* leading guard page(s) */
447 		if (sk_headguard_sz == 0) {
448 			read_frandom(&sz, sizeof(sz));
449 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
450 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
451 			sk_headguard_sz = SK_GUARD_MAX;
452 		}
453 		if (sk_headguard_sz < SK_GUARD_MIN) {
454 			sk_headguard_sz = SK_GUARD_MIN;
455 		}
456 		/* trailing guard page(s) */
457 		if (sk_tailguard_sz == 0) {
458 			read_frandom(&sz, sizeof(sz));
459 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
460 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
461 			sk_tailguard_sz = SK_GUARD_MAX;
462 		}
463 		if (sk_tailguard_sz < SK_GUARD_MIN) {
464 			sk_tailguard_sz = SK_GUARD_MIN;
465 		}
466 	} else {
467 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
468 	}
469 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
470 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
471 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
472 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
473 
474 	__sk_inited = 1;
475 
476 	SK_LOCK();
477 	skmem_init();
478 	error = nexus_init();
479 	if (error == 0) {
480 		error = channel_init();
481 	}
482 	if (error != 0) {
483 		skywalk_fini();
484 	}
485 	SK_UNLOCK();
486 
487 	if (error == 0) {
488 #if (SK_LOG || DEVELOPMENT || DEBUG)
489 		/* allocate space for sk_dump_buf */
490 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
491 		    skmem_tag_dump);
492 		sk_dump_buf_size = SK_DUMP_BUF_SIZE;
493 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
494 
495 		netns_init();
496 		protons_init();
497 		flowidns_init();
498 
499 #if (DEVELOPMENT || DEBUG)
500 		skywalk_self_tests();
501 		skmem_test_init();
502 		cht_test_init();
503 #endif /* DEVELOPMENT || DEBUG */
504 	}
505 
506 	return error;
507 }
508 
509 /*
510  * csproc_get_platform_binary() returns non-zero if the process is platform
511  * code, which means that it is considered part of the Operating System.
512  * On iOS, that means it's contained in the trust cache or a loaded one.
513  * On macOS, everything signed by B&I is currently platform code, but the
514  * policy in general is subject to change.  Thus this is an approximate.
515  */
516 boolean_t
skywalk_check_platform_binary(proc_t p)517 skywalk_check_platform_binary(proc_t p)
518 {
519 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
520 }
521 
522 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)523 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
524 {
525 #pragma unused(p)
526 	int ret = EPERM;
527 
528 	if (cred != NULL) {
529 		ret = priv_check_cred(cred, priv, 0);
530 	}
531 #if (DEVELOPMENT || DEBUG)
532 	if (ret != 0) {
533 		const char *pstr;
534 
535 		switch (priv) {
536 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
537 			pstr = "com.apple.private.skywalk.register-user-pipe";
538 			break;
539 
540 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
541 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
542 			break;
543 
544 		case PRIV_SKYWALK_REGISTER_NET_IF:
545 			pstr = "com.apple.private.skywalk.register-net-if";
546 			break;
547 
548 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
549 			pstr = "com.apple.private.skywalk.register-flow-switch";
550 			break;
551 
552 		case PRIV_SKYWALK_OBSERVE_ALL:
553 			pstr = "com.apple.private.skywalk.observe-all";
554 			break;
555 
556 		case PRIV_SKYWALK_OBSERVE_STATS:
557 			pstr = "com.apple.private.skywalk.observe-stats";
558 			break;
559 
560 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
561 			pstr = "com.apple.private.skywalk.low-latency-channel";
562 			break;
563 
564 		default:
565 			pstr = "unknown";
566 			break;
567 		}
568 
569 #if SK_LOG
570 		if (__priv_check) {
571 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
572 			    "(\"%s\") err %d", sk_proc_name(p),
573 			    sk_proc_pid(p), priv, pstr, ret);
574 		} else {
575 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
576 			    "%d (\"%s\") err %d", sk_proc_name(p),
577 			    sk_proc_pid(p), priv, pstr, ret);
578 		}
579 #endif /* SK_LOG */
580 
581 		/* ignore privilege check failures if requested */
582 		if (!__priv_check) {
583 			ret = 0;
584 		}
585 	}
586 #endif /* !DEVELOPMENT && !DEBUG */
587 
588 	return ret;
589 }
590 
591 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)592 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
593 {
594 	return sk_priv_chk(p, cred, priv);
595 }
596 
597 #if CONFIG_MACF
598 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)599 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
600 {
601 	int ret;
602 	kauth_cred_t cred = kauth_cred_proc_ref(p);
603 	ret = mac_system_check_info(cred, info_type);
604 	kauth_cred_unref(&cred);
605 
606 	return ret;
607 }
608 #endif /* CONFIG_MACF */
609 
610 /*
611  * Scan thru the list of privileges needed before we allow someone
612  * to open a handle to the Nexus controller.  This should be done
613  * at nxctl_create() time, and additional privilege check specific
614  * to the operation (e.g. register, etc.) should be done afterwards.
615  */
616 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)617 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
618 {
619 	int ret = 0;
620 
621 	if (p == kernproc) {
622 		goto done;
623 	}
624 
625 	do {
626 		/*
627 		 * Check for observe-{stats,all} entitlements first
628 		 * before the rest, to account for nexus controller
629 		 * clients that don't need anything but statistics;
630 		 * it would help quiesce sandbox violation warnings.
631 		 */
632 		if ((ret = sk_priv_chk(p, cred,
633 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
634 			break;
635 		}
636 		if ((ret = sk_priv_chk(p, cred,
637 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
638 			break;
639 		}
640 		if ((ret = sk_priv_chk(p, cred,
641 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
642 			break;
643 		}
644 		if ((ret = sk_priv_chk(p, cred,
645 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
646 			break;
647 		}
648 		if ((ret = sk_priv_chk(p, cred,
649 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
650 			break;
651 		}
652 		if ((ret = sk_priv_chk(p, cred,
653 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
654 			break;
655 		}
656 		/* none set, so too bad */
657 		ret = EPERM;
658 	} while (0);
659 
660 #if (DEVELOPMENT || DEBUG)
661 	if (ret != 0) {
662 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
663 		    "err %d", sk_proc_name(p), sk_proc_pid(p), ret);
664 	}
665 #endif /* !DEVELOPMENT && !DEBUG */
666 done:
667 	return ret;
668 }
669 
670 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)671 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
672 {
673 #define GUARD_CH_SIG    0x4348  /* 'CH' */
674 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
675 	union {
676 		uint8_t         _u8[8];
677 		uint16_t        _u16[4];
678 		uint64_t        _u64;
679 	} __u;
680 
681 	read_random(&__u._u16[0], sizeof(uint16_t));
682 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
683 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
684 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
685 	VERIFY(__u._u64 != 0);
686 
687 	bzero(guard, sizeof(*guard));
688 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
689 	    sizeof(uint64_t)));
690 }
691 
692 
693 extern char *
__counted_by(sizeof (uuid_string_t))694 __counted_by(sizeof(uuid_string_t))
695 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
696 {
697 	uuid_unparse_upper(uu, out);
698 	return out;
699 }
700 
701 #if SK_LOG
702 /*
703  * packet-dump function, user-supplied or static buffer.
704  * The destination buffer must be at least 30+4*len
705  *
706  * @param p
707  *   buffer to be dumped.
708  * @param len
709  *   buffer's total length.
710  * @param dumplen
711  *   length to be dumped.
712  */
713 const char *
__counted_by(SK_DUMP_BUF_SIZE)714 __counted_by(SK_DUMP_BUF_SIZE)
715 sk_dump(const char *label, const void *__sized_by(len) obj, int len, int dumplen)
716 {
717 	int i, j, i0, n = 0;
718 	static char hex[] = "0123456789abcdef";
719 	const char *p = obj;    /* dump cursor */
720 	uint32_t size;
721 	char *__sized_by(size) o;        /* output position */
722 	const int lim = SK_DUMP_BUF_SIZE;
723 	char* __counted_by(lim) dst = sk_dump_buf;
724 
725 
726 #define P_HI(x) hex[((x) & 0xf0) >> 4]
727 #define P_LO(x) hex[((x) & 0xf)]
728 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
729 
730 	dumplen = MIN(len, dumplen);
731 	o = dst;
732 	size = lim;
733 	n = scnprintf(o, lim, "%s %p len %d lim %d\n", label,
734 	    SK_KVA(p), len, lim);
735 	o += strbuflen(o, n);
736 	size -= n;
737 	/* hexdump routine */
738 	for (i = 0; i < dumplen;) {
739 		n = scnprintf(o, size, "%5d: ", i);
740 		o += n;
741 		size -= n;
742 		memset(o, ' ', 48);
743 		i0 = i;
744 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
745 			o[j * 3] = P_HI(p[i]);
746 			o[j * 3 + 1] = P_LO(p[i]);
747 		}
748 		i = i0;
749 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
750 			o[j + 48] = P_C(p[i]);
751 		}
752 		o[j + 48] = '\n';
753 		o += j + 49;
754 		size -= (j + 49);
755 	}
756 	*o = '\0';
757 #undef P_HI
758 #undef P_LO
759 #undef P_C
760 	return dst;
761 }
762 
763 /*
764  * "Safe" variant of proc_name_address(), meant to be used only for logging.
765  */
766 const char *
sk_proc_name(struct proc * p)767 sk_proc_name(struct proc *p)
768 {
769 	if (p == PROC_NULL) {
770 		return "proc_null";
771 	}
772 
773 	return proc_name_address(p);
774 }
775 
776 /*
777  * "Safe" variant of proc_pid(), mean to be used only for logging.
778  */
779 int
sk_proc_pid(struct proc * p)780 sk_proc_pid(struct proc *p)
781 {
782 	if (p == PROC_NULL) {
783 		return -1;
784 	}
785 
786 	return proc_pid(p);
787 }
788 
789 const char *
sk_ntop(int af,const void * addr,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)790 sk_ntop(int af, const void *addr, char *__counted_by(addr_strlen)addr_str,
791     size_t addr_strlen)
792 {
793 	const char *__null_terminated str = NULL;
794 
795 	addr_str[0] = '\0';
796 
797 	if (inp_log_privacy != 0) {
798 		switch (af) {
799 		case AF_INET:
800 			strlcpy(addr_str, "<IPv4-redacted>", addr_strlen);
801 			break;
802 		case AF_INET6:
803 			strlcpy(addr_str, "<IPv6-redacted>", addr_strlen);
804 			break;
805 		default:
806 			VERIFY(0);
807 			__builtin_unreachable();
808 		}
809 		str = __unsafe_null_terminated_from_indexable(addr_str);
810 	} else {
811 		str = inet_ntop(af, addr, addr_str, (socklen_t)addr_strlen);
812 	}
813 
814 	return str;
815 }
816 
817 const char *
sk_sa_ntop(struct sockaddr * sa,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)818 sk_sa_ntop(struct sockaddr *sa, char *__counted_by(addr_strlen)addr_str,
819     size_t addr_strlen)
820 {
821 	const char *__null_terminated str = NULL;
822 
823 	addr_str[0] = '\0';
824 
825 	switch (sa->sa_family) {
826 	case AF_INET:
827 		str = sk_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
828 		    addr_str, (socklen_t)addr_strlen);
829 		break;
830 
831 	case AF_INET6:
832 		str = sk_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
833 		    addr_str, (socklen_t)addr_strlen);
834 		break;
835 
836 	default:
837 		str = __unsafe_null_terminated_from_indexable(addr_str);
838 		break;
839 	}
840 
841 	return str;
842 }
843 #endif /* SK_LOG */
844 
845 bool
sk_sa_has_addr(struct sockaddr * sa)846 sk_sa_has_addr(struct sockaddr *sa)
847 {
848 	switch (sa->sa_family) {
849 	case AF_INET:
850 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
851 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
852 	case AF_INET6:
853 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
854 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
855 	default:
856 		return false;
857 	}
858 }
859 
860 bool
sk_sa_has_port(struct sockaddr * sa)861 sk_sa_has_port(struct sockaddr *sa)
862 {
863 	switch (sa->sa_family) {
864 	case AF_INET:
865 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
866 		return SIN(sa)->sin_port != 0;
867 	case AF_INET6:
868 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
869 		return SIN6(sa)->sin6_port != 0;
870 	default:
871 		return false;
872 	}
873 }
874 
875 /* returns port number in host byte order */
876 uint16_t
sk_sa_get_port(struct sockaddr * sa)877 sk_sa_get_port(struct sockaddr *sa)
878 {
879 	switch (sa->sa_family) {
880 	case AF_INET:
881 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
882 		return ntohs(SIN(sa)->sin_port);
883 	case AF_INET6:
884 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
885 		return ntohs(SIN6(sa)->sin6_port);
886 	default:
887 		VERIFY(0);
888 		/* NOTREACHED */
889 		__builtin_unreachable();
890 	}
891 }
892 
893 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)894 skywalk_kill_process(struct proc *p, uint64_t reason_code)
895 {
896 	os_reason_t exit_reason = OS_REASON_NULL;
897 
898 	VERIFY(p != kernproc);
899 
900 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
901 	if (exit_reason == OS_REASON_NULL) {
902 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
903 		    "0x%llX", sk_proc_name(p), sk_proc_pid(p),
904 		    reason_code);
905 	} else {
906 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
907 		SK_ERR("%s(%d) aborted for reason 0x%llX",
908 		    sk_proc_name(p), sk_proc_pid(p), reason_code);
909 	}
910 
911 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
912 	    exit_reason);
913 }
914 
915 #if (DEVELOPMENT || DEBUG)
916 #define SK_MEMCMP_LEN 256               /* length of each section */
917 #define SK_MASK_MAXLEN 80               /* maximum mask length */
918 
919 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
920 	static_assert(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
921 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
922 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
923 	        panic_plain("\nbroken: " #t " using "                   \
924 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
925 	/* NOTREACHED */                                        \
926 	        __builtin_unreachable();                                \
927 	}                                                               \
928 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
929 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
930 	        panic_plain("\nbroken: " #t " using "                   \
931 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
932 	/* NOTREACHED */                                        \
933 	        __builtin_unreachable();                                \
934 	}                                                               \
935 } while (0)
936 
937 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
938 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
939 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
940 	            " mismatch (expected match) at i=%d s1=0x%x"        \
941 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
942 	/* NOTREACHED */                                        \
943 	        __builtin_unreachable();                                \
944 	}                                                               \
945 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
946 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
947 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
948 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
949 	/* NOTREACHED */                                        \
950 	        __builtin_unreachable();                                \
951 	}                                                               \
952 } while (0)
953 
954 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
955 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
956 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
957 	            " match (expected mismatch) at i=%d s1=0x%x"        \
958 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
959 	/* NOTREACHED */                                        \
960 	        __builtin_unreachable();                                \
961 	}                                                               \
962 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
963 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
964 	            "_scalar match (expected mismatch) at i=%d "        \
965 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
966 	/* NOTREACHED */                                        \
967 	        __builtin_unreachable();                                \
968 	}                                                               \
969 } while (0)
970 
971 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
972 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
973 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
974 	        panic_plain("\nbroken: " #t " using "                   \
975 	            "sk_memcmp_mask at i=%d\n", i);                     \
976 	/* NOTREACHED */                                        \
977 	        __builtin_unreachable();                                \
978 	}                                                               \
979 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
980 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
981 	        panic_plain("\nbroken: " #t " using "                   \
982 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
983 	/* NOTREACHED */                                        \
984 	        __builtin_unreachable();                                \
985 	}                                                               \
986 } while (0)
987 
988 static inline int
skywalk_memcmp_mask_ref(const uint8_t * __sized_by (n)src1,const uint8_t * __sized_by (n)src2,const uint8_t * __sized_by (n)byte_mask,size_t n)989 skywalk_memcmp_mask_ref(const uint8_t *__sized_by(n)src1,
990     const uint8_t *__sized_by(n)src2, const uint8_t *__sized_by(n)byte_mask,
991     size_t n)
992 {
993 	uint32_t result = 0;
994 	for (size_t i = 0; i < n; i++) {
995 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
996 	}
997 	return result;
998 }
999 
1000 static void
skywalk_memcmp_mask_self_tests(void)1001 skywalk_memcmp_mask_self_tests(void)
1002 {
1003 	static const uint8_t ipv4_m[] = {
1004 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1005 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1006 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1007 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1008 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1009 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1010 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1011 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1012 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1013 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1014 	};
1015 	static const uint8_t ipv6_m[] = {
1016 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1017 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1018 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1019 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1020 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1021 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1022 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1023 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1024 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1025 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1026 	};
1027 	static const uint8_t tcp_m[] = {
1028 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1029 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1030 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1031 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1032 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1033 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1034 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1035 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1036 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1037 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1038 	};
1039 	static const uint8_t ipv6_tcp_m[] = {
1040 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1041 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1042 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1043 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1044 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1045 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1046 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1047 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1048 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1049 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1050 	};
1051 	static const uint8_t udp_m[] = {
1052 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1053 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1054 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1055 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1056 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1057 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1058 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1059 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1060 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1061 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1062 	};
1063 	static const uint8_t fk_all_m[] = {
1064 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1065 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1066 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1067 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1068 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1069 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1070 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1071 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1072 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1073 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1074 	};
1075 	static const uint8_t fk_t2_m[] = {
1076 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1077 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1082 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1083 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1084 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1085 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1086 	};
1087 	static const uint8_t fk_t3_m[] = {
1088 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1089 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1090 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1091 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1094 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1095 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1096 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1097 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1098 	};
1099 	static const uint8_t fk_t4_m[] = {
1100 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1101 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1102 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1103 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1104 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1105 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1106 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1107 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1108 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1109 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1110 	};
1111 	static const uint8_t fk_t5_m[] = {
1112 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1113 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1114 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1115 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1116 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1117 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1119 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1121 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1122 	};
1123 	static const uint8_t fk_i1_m[] = {
1124 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1125 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1126 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1127 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1128 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1129 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1130 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1131 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1132 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1133 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1134 	};
1135 	static const uint8_t fk_i2_m[] = {
1136 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1137 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1138 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1142 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1143 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1144 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1145 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1146 	};
1147 	static const uint8_t fk_i3_m[] = {
1148 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1149 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1150 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1151 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1152 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1153 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1154 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1155 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1156 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1157 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1158 	};
1159 
1160 	/* validate flow entry mask (2-tuple) */
1161 	static_assert(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1162 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1163 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1164 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1165 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1166 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1167 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1168 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1169 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1170 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1171 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1172 
1173 	static_assert(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1174 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1175 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1176 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1177 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1178 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1179 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1180 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1181 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1182 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1183 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1184 
1185 	static_assert(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1186 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1187 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1188 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1189 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1190 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1191 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1192 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1193 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1194 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1195 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1196 
1197 	static_assert(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1198 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1199 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1200 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1201 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1202 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1203 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1204 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1205 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1206 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1207 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1208 
1209 	static_assert(FKMASK_IPFLOW1 == FKMASK_PROTO);
1210 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1211 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1212 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1213 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1214 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1215 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1216 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1217 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1218 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1219 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1220 
1221 	static_assert(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1222 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1223 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1224 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1225 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1226 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1227 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1228 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1229 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1230 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1231 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1232 
1233 	static_assert(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1234 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1235 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1236 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1237 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1238 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1239 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1240 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1241 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1242 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1243 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1244 
1245 	VERIFY(sk_dump_buf != NULL);
1246 
1247 	/* reset sk_dump_buf */
1248 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1249 
1250 	/*
1251 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1252 	 * section begins on a 128-bit boundary, and is a multiple of
1253 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1254 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1255 	 *
1256 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1257 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1258 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1259 	 */
1260 	void *s1, *s2, *s3;
1261 	uintptr_t diff;
1262 
1263 	s1 = sk_dump_buf;
1264 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1265 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1266 		s1 = (void *)((char *)s1 + diff);
1267 	}
1268 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1269 	s2 = (void *)((char *)s1 + SK_MEMCMP_LEN);
1270 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1271 	s3 = (void *)((char *)s2 + SK_MEMCMP_LEN);
1272 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1273 
1274 	uint8_t *hdr1 = s1;
1275 	uint8_t *hdr2 = s2;
1276 	uint8_t *byte_m = s3;
1277 
1278 	/* fill byte mask with random data */
1279 	read_frandom(byte_m, SK_MEMCMP_LEN);
1280 
1281 	kprintf("Skywalk: memcmp mask ... ");
1282 
1283 	int i;
1284 	for (i = 0; i < 80; i++) {
1285 		hdr1[i] = 1;
1286 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1287 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1288 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1289 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1290 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1291 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1292 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1293 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1294 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1295 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1296 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1297 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1298 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1299 		hdr2[i] = 1;
1300 	}
1301 
1302 	bzero(hdr1, SK_MEMCMP_LEN);
1303 	bzero(hdr2, SK_MEMCMP_LEN);
1304 
1305 	/* re-fill byte mask with random data */
1306 	read_frandom(byte_m, SK_MEMCMP_LEN);
1307 
1308 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1309 		hdr1[i] = 1;
1310 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1311 		hdr2[i] = 1;
1312 	}
1313 
1314 	/* fill hdr1 and hd2 with random data */
1315 	read_frandom(hdr1, SK_MEMCMP_LEN);
1316 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1317 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1318 
1319 	for (i = 0; i < 80; i++) {
1320 		uint8_t val = hdr2[i];
1321 		uint8_t mval = byte_m[i];
1322 
1323 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1324 			uint8_t newval;
1325 			read_frandom(&newval, sizeof(newval));
1326 			hdr2[i] = newval;
1327 		}
1328 		if (i < 16) {
1329 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1330 		} else if (i < 32) {
1331 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1332 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1333 		} else if (i < 48) {
1334 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1335 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1336 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1337 		} else if (i < 64) {
1338 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1339 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1340 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1341 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1342 		} else if (i < 80) {
1343 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1344 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1345 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1346 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1347 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1348 		}
1349 		byte_m[i] = 0;
1350 		if (i < 16) {
1351 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1352 		} else if (i < 32) {
1353 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1354 		} else if (i < 48) {
1355 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1356 		} else if (i < 64) {
1357 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1358 		} else if (i < 80) {
1359 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1360 		}
1361 		hdr2[i] = val;
1362 		byte_m[i] = mval;
1363 	}
1364 
1365 	kprintf("PASSED\n");
1366 }
1367 
1368 #define SK_COPY_LEN     128             /* length of each section */
1369 
1370 #define SK_COPY_PREPARE(t) do {                                         \
1371 	bzero(s2, SK_COPY_LEN);                                         \
1372 	bzero(s3, SK_COPY_LEN);                                         \
1373 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1374 	kprintf("Skywalk: " #t " ... ");                                \
1375 } while (0)
1376 
1377 #define SK_COPY_VERIFY(t) do {                                          \
1378 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1379 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1380 	/* NOTREACHED */                                        \
1381 	        __builtin_unreachable();                                \
1382 	}                                                               \
1383 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1384 	        panic_plain("\nbroken: " #t "\n");                      \
1385 	/* NOTREACHED */                                        \
1386 	        __builtin_unreachable();                                \
1387 	} else {                                                        \
1388 	        kprintf("PASSED\n");                                    \
1389 	}                                                               \
1390 } while (0)
1391 
1392 #define SK_ZERO_PREPARE(t) do {                                         \
1393 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1394 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1395 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1396 	kprintf("Skywalk: " #t " ... ");                                \
1397 } while (0)
1398 
1399 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1400 
1401 static void
skywalk_self_tests(void)1402 skywalk_self_tests(void)
1403 {
1404 	void *s1, *s2, *s3;
1405 	void *_s1, *_s2, *_s3;
1406 	uintptr_t diff;
1407 
1408 	VERIFY(sk_dump_buf != NULL);
1409 
1410 	/*
1411 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1412 	 * section begins on a 128-bit boundary, and is a multiple of
1413 	 * 64-bytes len.  A section is 128-bytes long, which means we
1414 	 * need at least 16+(3*128) bytes.
1415 	 *
1416 	 * 1st section is source buffer full of random data;
1417 	 * 2nd section is reference target based on bcopy;
1418 	 * 3rd section is test target base on our stuff.
1419 	 */
1420 	static_assert(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1421 	static_assert((SK_COPY_LEN % 16) == 0);
1422 	static_assert((SK_DUMP_BUF_ALIGN % 16) == 0);
1423 	static_assert(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1424 
1425 	s1 = sk_dump_buf;
1426 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1427 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1428 		s1 = (void *)((char *)s1 + diff);
1429 	}
1430 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1431 	s2 = (void *)((char *)s1 + SK_COPY_LEN);
1432 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1433 	s3 = (void *)((char *)s2 + SK_COPY_LEN);
1434 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1435 
1436 	/* fill s1 with random data */
1437 	read_frandom(s1, SK_COPY_LEN);
1438 
1439 	kprintf("Skywalk: running self-tests\n");
1440 
1441 	/* Copy 8-bytes, 64-bit aligned */
1442 	SK_COPY_PREPARE(sk_copy64_8);
1443 	bcopy(s1, s2, 8);
1444 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1445 	SK_COPY_VERIFY(sk_copy64_8);
1446 
1447 	/* Copy 8-bytes, 32-bit aligned */
1448 	SK_COPY_PREPARE(sk_copy32_8);
1449 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1450 	    (void *)((char *)s2 + sizeof(uint32_t)), 8);
1451 	sk_copy32_8((void *)((char *)s1 + sizeof(uint32_t)),
1452 	    (void *)((char *)s3 + sizeof(uint32_t)));
1453 	SK_COPY_VERIFY(sk_copy32_8);
1454 
1455 	/* Copy 16-bytes, 64-bit aligned */
1456 	SK_COPY_PREPARE(sk_copy64_16);
1457 	bcopy(s1, s2, 16);
1458 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1459 	SK_COPY_VERIFY(sk_copy64_16);
1460 
1461 	/* Copy 16-bytes, 32-bit aligned */
1462 	SK_COPY_PREPARE(sk_copy32_16);
1463 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1464 	    (void *)((char *)s2 + sizeof(uint32_t)), 16);
1465 	sk_copy32_16((void *)((char *)s1 + sizeof(uint32_t)),
1466 	    (void *)((char *)s3 + sizeof(uint32_t)));
1467 	SK_COPY_VERIFY(sk_copy32_16);
1468 
1469 	/* Copy 20-bytes, 64-bit aligned */
1470 	SK_COPY_PREPARE(sk_copy64_20);
1471 	bcopy(s1, s2, 20);
1472 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1473 	SK_COPY_VERIFY(sk_copy64_20);
1474 
1475 	/* Copy 24-bytes, 64-bit aligned */
1476 	SK_COPY_PREPARE(sk_copy64_24);
1477 	bcopy(s1, s2, 24);
1478 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1479 	SK_COPY_VERIFY(sk_copy64_24);
1480 
1481 	/* Copy 32-bytes, 64-bit aligned */
1482 	SK_COPY_PREPARE(sk_copy64_32);
1483 	bcopy(s1, s2, 32);
1484 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1485 	SK_COPY_VERIFY(sk_copy64_32);
1486 
1487 	/* Copy 32-bytes, 32-bit aligned */
1488 	SK_COPY_PREPARE(sk_copy32_32);
1489 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1490 	    (void *)((char *)s2 + sizeof(uint32_t)), 32);
1491 	sk_copy32_32((void *)((char *)s1 + sizeof(uint32_t)),
1492 	    (void *)((char *)s3 + sizeof(uint32_t)));
1493 	SK_COPY_VERIFY(sk_copy32_32);
1494 
1495 	/* Copy 40-bytes, 64-bit aligned */
1496 	SK_COPY_PREPARE(sk_copy64_40);
1497 	bcopy(s1, s2, 40);
1498 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1499 	SK_COPY_VERIFY(sk_copy64_40);
1500 
1501 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1502 	SK_COPY_PREPARE(sk_copy64_64x);
1503 	bcopy(s1, s2, SK_COPY_LEN);
1504 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1505 	SK_COPY_VERIFY(sk_copy64_64x);
1506 
1507 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1508 	SK_COPY_PREPARE(sk_copy64_32x);
1509 	bcopy(s1, s2, SK_COPY_LEN);
1510 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1511 	SK_COPY_VERIFY(sk_copy64_32x);
1512 
1513 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1514 	SK_COPY_PREPARE(sk_copy64_8x);
1515 	bcopy(s1, s2, SK_COPY_LEN);
1516 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1517 	SK_COPY_VERIFY(sk_copy64_8x);
1518 
1519 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1520 	SK_COPY_PREPARE(sk_copy64_4x);
1521 	bcopy(s1, s2, SK_COPY_LEN);
1522 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1523 	SK_COPY_VERIFY(sk_copy64_4x);
1524 
1525 	/*
1526 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1527 	 *
1528 	 * 1st section is source buffer full of random data;
1529 	 * 2nd section is reference target based on bzero;
1530 	 * 3rd section is test target base on our stuff.
1531 	 */
1532 	SK_ZERO_PREPARE(sk_zero_16);
1533 	bzero(s2, 16);
1534 	sk_zero_16(s3);
1535 	SK_ZERO_VERIFY(sk_zero_16);
1536 
1537 	SK_ZERO_PREPARE(sk_zero_32);
1538 	bzero(s2, 32);
1539 	sk_zero_32(s3);
1540 	SK_ZERO_VERIFY(sk_zero_32);
1541 
1542 	SK_ZERO_PREPARE(sk_zero_48);
1543 	bzero(s2, 48);
1544 	sk_zero_48(s3);
1545 	SK_ZERO_VERIFY(sk_zero_48);
1546 
1547 	SK_ZERO_PREPARE(sk_zero_128);
1548 	bzero(s2, 128);
1549 	sk_zero_128(s3);
1550 	SK_ZERO_VERIFY(sk_zero_128);
1551 
1552 	/* Perform memcmp with mask self tests */
1553 	skywalk_memcmp_mask_self_tests();
1554 
1555 	/* reset sk_dump_buf */
1556 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1557 
1558 	/* Keep packet trace code in sync with ariadne plist */
1559 	static_assert(SK_KTRACE_AON_IF_STATS == 0x8100004);
1560 
1561 	static_assert(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1562 	static_assert(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1563 	static_assert(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1564 
1565 	static_assert(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1566 	static_assert(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1567 	static_assert(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1568 	static_assert(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1569 	static_assert(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1570 	static_assert(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1571 
1572 	static_assert(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1573 
1574 	static_assert(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1575 	static_assert(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1576 	static_assert(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1577 	static_assert(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1578 	static_assert(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1579 	static_assert(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1580 }
1581 #endif /* DEVELOPMENT || DEBUG */
1582