xref: /xnu-12377.81.4/tests/arm_mte_compress.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm_acle.h>
30 #include <libproc.h>
31 #include <signal.h>
32 #include <spawn_private.h>
33 #include <stddef.h>
34 #include <stdlib.h>
35 
36 #include <mach/mach.h>
37 #include <mach/mach_init.h>
38 #include <mach/mach_vm.h>
39 #include <mach/vm_map.h>
40 
41 #include <darwintest.h>
42 
43 #include <stdint.h>
44 #include <unistd.h>
45 #include <sys/types.h>
46 #include <sys/sysctl.h>
47 #define HAS_MTE 1
48 
49 #include <vm/vm_compressor_info.h>
50 
51 #include "arm_mte_utilities.h"
52 #include "test_utils.h"
53 
54 T_GLOBAL_META(
55 	T_META_NAMESPACE("xnu.arm"),
56 	T_META_RADAR_COMPONENT_NAME("xnu"),
57 	T_META_RADAR_COMPONENT_VERSION("arm"),
58 	T_META_OWNER("s_shalom"),
59 	T_META_RUN_CONCURRENTLY(false) /* test is sampling global sysctls */
60 	);
61 
62 /*
63  * This test exercises internal functions that compress and decompress the MTE buffers
64  * These functions are not accessible from outside the kernel, so we include them here verbatim.
65  * In the future when user-land unit-test that runs kernel code we can move this there.
66  */
67 #define COMPRESSOR_TESTER 1
68 #define DEVELOPMENT 1
69 
70 // these need to be redefined since getting them from the XNU headers would create include conflicts
71 #define C_MTE_SIZE 512
72 #define C_SEG_OFFSET_ALIGNMENT_MASK (0x3FULL)
73 #define C_SEG_OFFSET_ALIGNMENT_BOUNDARY  (64)
74 
75 #define C_SLOT_EXTRA_METADATA           16            /* 16 possible tags */
76 #define C_SLOT_C_MTE_SIZE_MAX           (C_MTE_SIZE + C_SLOT_EXTRA_METADATA + 1)
77 
78 #define VM_MEMTAG_PTR_SIZE         56
79 #define VM_MEMTAG_TAG_SIZE          4
80 #define VM_MEMTAG_UPPER_SIZE        4
81 #define VM_MEMTAG_BYTES_PER_TAG    16
82 
83 #define C_SEG_ROUND_TO_ALIGNMENT(offset) \
84 	(((offset) + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK)
85 
86 #define __assert_only                   __unused
87 
88 #include "../osfmk/arm64/vm_mte_compress.c"
89 
90 // masks only the tags bits out of a pointer
91 #define TAG_MASK (((1UL << VM_MEMTAG_TAG_SIZE) - 1UL) << VM_MEMTAG_PTR_SIZE)
92 
93 
94 static void
show_buf_diff(const uint8_t * a,const uint8_t * b,size_t sz)95 show_buf_diff(const uint8_t* a, const uint8_t* b, size_t sz)
96 {
97 	for (uint32_t i = 0; i < sz; ++i) {
98 		if (a[i] != b[i]) {
99 			T_LOG("  byte diff at %u : %d != %d", i, (int)a[i], (int)b[i]);
100 			break;
101 		}
102 	}
103 }
104 
105 // expected results of vm_mte_rle_compress_tags()
106 #define CASE_UNKNOWN 0
107 #define CASE_NON_COMP 1
108 #define CASE_SINGLE_TAG 2
109 #define CASE_NORMAL 3
110 
111 static uint32_t
test_compress_decompress_eq(const uint8_t * buf,const char * desc,int expect_case)112 test_compress_decompress_eq(const uint8_t *buf, const char *desc, int expect_case)
113 {
114 	uint8_t compressed[C_MTE_SIZE] = {};
115 	uint32_t compressed_size = vm_mte_rle_compress_tags((uint8_t *)buf, C_MTE_SIZE, compressed, C_MTE_SIZE);
116 	if ((expect_case == CASE_NON_COMP && compressed_size != C_MTE_SIZE) ||
117 	    (expect_case == CASE_SINGLE_TAG && compressed_size <= C_MTE_SIZE) ||
118 	    (expect_case == CASE_NORMAL && compressed_size >= C_MTE_SIZE)) {
119 		T_ASSERT_FAIL("case %s", desc);
120 	}
121 
122 	uint8_t decompressed[C_MTE_SIZE] = {};
123 	bool ret = vm_mte_rle_decompress_tags(compressed, compressed_size, decompressed, C_MTE_SIZE);
124 	if (!ret) {
125 		show_buf_diff(buf, decompressed, C_MTE_SIZE);
126 		T_ASSERT_FAIL("decompress return %s", desc);
127 	}
128 
129 	if (memcmp((char*)buf, (char*)decompressed, C_MTE_SIZE) != 0) {
130 		show_buf_diff(buf, decompressed, C_MTE_SIZE);
131 		T_ASSERT_FAIL("decompress equal original %s", desc);
132 	} else {
133 		bool quiet = (desc[0] == '_'); // don't want to spam the console during the many random runs
134 		if (quiet) {
135 			T_QUIET;
136 		}
137 		T_PASS("OK %s  (size=%u)", desc, compressed_size);
138 	}
139 
140 	return compressed_size;
141 }
142 
143 
144 static void
simple_tests(void)145 simple_tests(void)
146 {
147 	uint8_t buf[C_MTE_SIZE] = {};
148 	test_compress_decompress_eq(buf, "zeros", CASE_SINGLE_TAG);
149 
150 	buf[0] = 0x01;
151 	test_compress_decompress_eq(buf, "simple 1", CASE_NORMAL);
152 
153 	memset(buf, 0x22, C_MTE_SIZE);
154 	test_compress_decompress_eq(buf, "twos", CASE_SINGLE_TAG);
155 
156 	buf[0] = 0x21;
157 	test_compress_decompress_eq(buf, "simple 2", CASE_NORMAL);
158 
159 	buf[0] = 0x01;
160 	test_compress_decompress_eq(buf, "simple 3", CASE_NORMAL);
161 
162 	buf[0] = 0x11;
163 	test_compress_decompress_eq(buf, "simple 4", CASE_NORMAL);
164 
165 	buf[0] = 0x31;
166 	test_compress_decompress_eq(buf, "simple 5", CASE_NORMAL);
167 
168 	buf[1] = 0x01;
169 	test_compress_decompress_eq(buf, "simple 6", CASE_NORMAL);
170 
171 	buf[0] = 0x11;
172 	buf[1] = 0x11;
173 	test_compress_decompress_eq(buf, "simple 7", CASE_NORMAL);
174 
175 	buf[2] = 0x01;
176 	test_compress_decompress_eq(buf, "simple 8", CASE_NORMAL);
177 
178 	buf[2] = 0x11;
179 	test_compress_decompress_eq(buf, "simple 9", CASE_NORMAL);
180 
181 	buf[3] = 0x01;
182 	test_compress_decompress_eq(buf, "simple 10", CASE_NORMAL);
183 
184 	buf[3] = 0x11;
185 	test_compress_decompress_eq(buf, "simple 11", CASE_NORMAL);
186 
187 	buf[3] = 0x21;
188 	test_compress_decompress_eq(buf, "simple 12", CASE_NORMAL);
189 
190 	buf[3] = 0x12;
191 	test_compress_decompress_eq(buf, "simple 13", CASE_NORMAL);
192 
193 	memset(buf, 0x22, C_MTE_SIZE);
194 	buf[255] = 0x01;
195 	test_compress_decompress_eq(buf, "simple 14", CASE_NORMAL);
196 	buf[255] = 0x21;
197 	test_compress_decompress_eq(buf, "simple 15", CASE_NORMAL);
198 	buf[255] = 0x12;
199 	test_compress_decompress_eq(buf, "simple 16", CASE_NORMAL);
200 
201 	for (int i = 0; i < C_MTE_SIZE; ++i) {
202 		buf[i] = i % 16;
203 	}
204 	test_compress_decompress_eq(buf, "non-comp", CASE_NON_COMP);
205 
206 	memset(buf, 0x22, C_MTE_SIZE);
207 	buf[0] = 0x11;
208 	buf[1] = 0x01;
209 	buf[2] = 0x10;
210 	buf[3] = 0x11;
211 	buf[4] = 0x01;
212 	buf[5] = 0x00;
213 	test_compress_decompress_eq(buf, "simple 17", CASE_NORMAL);
214 }
215 
216 // run compress-decompress with input generated by the given callback
217 static void
218 gen_test(const char* name, int num_runs, int min_opt, int max_opt, void (^generate)(uint8_t *buf, int num_options))
219 {
220 	uint8_t buf[C_MTE_SIZE] = {};
221 	uint32_t count_incomp = 0, count_normal = 0, count_single = 0;
222 	uint64_t sum_normal = 0;
223 
224 	for (int num_options = min_opt; num_options <= max_opt; ++num_options) {
225 		for (int run = 0; run < num_runs; ++run) {
226 			generate(buf, num_options);
227 
228 			uint32_t sz = test_compress_decompress_eq(buf, name, CASE_UNKNOWN);
229 			if (sz == C_MTE_SIZE) {
230 				count_incomp++;
231 			} else if (sz < C_MTE_SIZE) {
232 				count_normal++;
233 				sum_normal += sz;
234 			} else {
235 				count_single++;
236 			}
237 		}
238 	}
239 
240 	T_LOG("%s: incompressible:%u  normal:%u (avg=%llu) sv:%u", name, count_incomp, count_normal, sum_normal / count_normal, count_single);
241 }
242 
243 static uint32_t rng_state = 0;
244 static void
my_srand(uint32_t seed)245 my_srand(uint32_t seed)
246 {
247 	rng_state = seed;
248 }
249 static uint32_t
my_rand()250 my_rand()
251 {
252 	rng_state = (rng_state * 1103515245) + 12345;
253 	uint32_t r = (rng_state >> 15);
254 	return r;
255 }
256 
257 
258 // fill a tags buffer with tags with values up to `num_options`
259 static void
random_tags_buf(uint8_t * buf,int num_options)260 random_tags_buf(uint8_t *buf, int num_options)
261 {
262 	T_QUIET; T_ASSERT_TRUE(num_options > 0 && num_options <= 16, "unexpected num_options");
263 	for (int i = 0; i < C_MTE_SIZE; ++i) {
264 		uint8_t tag1 = (uint8_t)(my_rand() % num_options);
265 		uint8_t tag2 = (uint8_t)(my_rand() % num_options);
266 		buf[i] = (uint8_t)((tag1 << 4) | tag2);
267 	}
268 }
269 
270 // fill a tags buffer with runs of tags with values up to `num_options` and up to `max_run` long
271 static void
random_tag_runs_buf(uint8_t * buf,int num_options,int max_run)272 random_tag_runs_buf(uint8_t *buf, int num_options, int max_run)
273 {
274 	T_QUIET; T_ASSERT_GE(max_run, 1, "unexpected max_runs"); // sanity
275 	T_QUIET; T_ASSERT_TRUE(num_options > 0 && num_options <= 16, "unexpected num_options");
276 
277 	uint8_t cur_tag = 0;
278 	int cur_run = 0;
279 	int cur_repeat = 0; // will be set on the first iteration
280 	for (int i = 0; i < C_MTE_SIZE; ++i) {
281 		uint8_t tags[2];
282 		for (int ti = 0; ti < 2; ++ti) {
283 			if (cur_run == cur_repeat) {
284 				cur_repeat = (my_rand() % max_run) + 1;
285 				cur_tag = (uint8_t)(my_rand() % num_options);
286 				cur_run = 0;
287 			}
288 			tags[ti] = cur_tag;
289 			++cur_run;
290 		}
291 		buf[i] = (uint8_t)((tags[1] << 4) | tags[0]);
292 	}
293 }
294 
295 #define TAGS_IN_PAGE (C_MTE_SIZE * 2)
296 
297 // fill a buffer with the tags of the same repeat count
298 static void
same_repeat_buf(uint8_t * buf,int num_repeat)299 same_repeat_buf(uint8_t *buf, int num_repeat)
300 {
301 	T_QUIET; T_ASSERT_TRUE(num_repeat >= 1 && num_repeat <= TAGS_IN_PAGE, "unexpected num_options");
302 	uint8_t cur_tag = 0;
303 	int cur_run = 0;
304 	for (int i = 0; i < C_MTE_SIZE; ++i) {
305 		uint8_t tags[2];
306 		for (int ti = 0; ti < 2; ++ti) {
307 			if (cur_run == num_repeat) {
308 				cur_tag = (cur_tag + 1) % 0xf;
309 				cur_run = 0;
310 			}
311 			tags[ti] = cur_tag;
312 			++cur_run;
313 		}
314 		buf[i] = (uint8_t)((tags[1] << 4) | tags[0]);
315 	}
316 }
317 
318 // fill a buffer with the same tag
319 static void
same_tag_buf(uint8_t * buf,int num_options)320 same_tag_buf(uint8_t *buf, int num_options)
321 {
322 	T_QUIET; T_ASSERT_TRUE(num_options >= 0 && num_options <= 16, "unexpected num_options");
323 	for (int i = 0; i < C_MTE_SIZE; ++i) {
324 		uint8_t tag = num_options;
325 		buf[i] = (uint8_t)((tag << 4) | tag);
326 	}
327 }
328 
329 static void
random_bytes_test(void)330 random_bytes_test(void)
331 {
332 	my_srand(0);
333 	gen_test("_rand_bytes", 10000, 2, 16, ^void (uint8_t *buf, int num_options) {
334 		random_tags_buf(buf, num_options);
335 	});
336 }
337 
338 static void
random_runs_test(int max_run)339 random_runs_test(int max_run)
340 {
341 	my_srand(0);
342 	gen_test("_rand_runs", 10000, 2, 16, ^void (uint8_t *buf, int num_options) {
343 		random_tag_runs_buf(buf, num_options, max_run);
344 	});
345 }
346 
347 static void
same_tag_test(void)348 same_tag_test(void)
349 {
350 	gen_test("_same_tag", 1, 0, 16, ^void (uint8_t *buf, int num_options) {
351 		same_tag_buf(buf, num_options);
352 	});
353 }
354 
355 static void
every_repeat_len(void)356 every_repeat_len(void)
357 {
358 	gen_test("_every_repeat", 1, 1, TAGS_IN_PAGE, ^void (uint8_t *buf, int num_options) {
359 		same_repeat_buf(buf, num_options);
360 	});
361 }
362 
363 T_DECL(mte_compress_tags,
364     "Test the MTE tags buffer compression and decompression functions")
365 {
366 	simple_tests();
367 	random_bytes_test();
368 	random_runs_test(C_MTE_SIZE);
369 	random_runs_test(C_MTE_SIZE / 2);
370 	random_runs_test(20);
371 	random_runs_test(3);
372 	same_tag_test();
373 	every_repeat_len();
374 }
375 
376 
377 static void
test_malformed(const uint8_t * compressed,uint32_t compressed_size,bool expected,const char * desc,uint32_t desc_arg)378 test_malformed(const uint8_t* compressed, uint32_t compressed_size, bool expected, const char* desc, uint32_t desc_arg)
379 {
380 	char decompressed[C_MTE_SIZE] = {};
381 	bool ret = vm_mte_rle_decompress_tags((uint8_t*)compressed, compressed_size, (uint8_t*)decompressed, C_MTE_SIZE);
382 	T_QUIET; T_ASSERT_EQ(ret, expected, "malformed decompressed %s %d", desc, desc_arg);
383 	T_PASS("OK %s %d", desc, desc_arg);
384 }
385 
386 static void
simple_malformed(void)387 simple_malformed(void)
388 {
389 	uint8_t buf[C_MTE_SIZE] = {};
390 
391 	buf[0] = 0x01;
392 	test_malformed(buf, 1, false, "underflow only 1 byte output", 0);
393 
394 	buf[0] = 0xF1;
395 	buf[1] = 0xF2; // filled 1024 nibbles
396 	test_malformed(buf, 2, true, "no overflow at edge", 0);
397 
398 	// filled all the output, but there's another command that would overflow
399 	buf[2] = 0x03;
400 	test_malformed(buf, 3, false, "overflow by 1 nibble", 0);
401 
402 	for (uint32_t i = 1; i <= 0xF; ++i) {
403 		buf[2] = (uint8_t)(0x3 | (i << 4)); // every command should cause overflow
404 		test_malformed(buf, 3, false, "overflow at edge", i);
405 	}
406 
407 	buf[0] = 0xF1; // 512
408 	buf[1] = 0xE2; // + 256
409 	buf[2] = 0xD3; // + 128
410 	buf[3] = 0xC4; // + 64
411 	buf[4] = 0xB5; // + 32
412 	buf[5] = 0xA6; // + 16
413 	buf[6] = 0x97; // + 8
414 	buf[7] = 0x78; // + 6 = filled 1022 nibbles
415 	test_malformed(buf, 8, false, "underflow missing 2 nibbles", 0);
416 	buf[7] = 0x88; // + 7 = filled 1023 nibbles
417 	test_malformed(buf, 8, false, "underflow missing 1 nibble", 0);
418 	buf[8] = 0x09; // + 1 = filled 1024 nibbles
419 	test_malformed(buf, 9, true, "no overflow from mid", 0);
420 
421 	for (uint32_t i = 1; i <= 0xF; ++i) {
422 		buf[8] = (uint8_t)(0x9 | (i << 4));
423 		test_malformed(buf, 9, false, "overflow at mid", i);
424 	}
425 }
426 
427 static void
random_malformed(int num_runs)428 random_malformed(int num_runs)
429 {
430 	uint8_t buf[C_MTE_SIZE] = {};
431 	int fail = 0, success = 0;
432 	my_srand(0);
433 	for (int run = 0; run < num_runs; ++run) {
434 		// fill buf with random bytes
435 		uint32_t sz = my_rand() % C_MTE_SIZE;
436 		for (uint32_t i = 0; i < sz; ++i) {
437 			buf[i] = my_rand() % 0xFF;
438 		}
439 
440 		uint8_t decompressed[C_MTE_SIZE] = {};
441 		bool ret = vm_mte_rle_decompress_tags(buf, sz, decompressed, C_MTE_SIZE);
442 		// don't know if it's going to succeed or fail.
443 		// we're testing that it doesn't assert or hang
444 		if (ret) {
445 			++success;
446 		} else {
447 			++fail;
448 		}
449 	}
450 	T_QUIET; T_ASSERT_TRUE(fail > num_runs * 0.9 && fail < num_runs, "too many succeeded or failed %u,%u", success, fail);
451 	T_PASS("OK random %u success, %u fail", success, fail);
452 }
453 
454 T_DECL(mte_decompress_malformed,
455     "Test that tags decompress returns an error")
456 {
457 	simple_malformed();
458 	random_malformed(1000000);
459 }
460 
461 
462 // This test isn't really useful for automatic testing, so it is disabled. It is useful for on-desk testing
463 // while trying to optimize these functions. For full optimization add this to the Makefile:
464 //   arm_mte_compress: CFLAGS += -O3
465 T_DECL(mte_compress_tags_perf,
466     "Test formance of MTE tags compression",
467     T_META_ENABLED(false))
468 {
469 	my_srand(0);
470 	// compress worst case - random data of tags [0-F]
471 	uint8_t buf[C_MTE_SIZE] = {};
472 	random_tags_buf(buf, 16);
473 	uint8_t compressed[C_MTE_SIZE] = {};
474 	uint32_t compressed_size = 0;
475 
476 	// warmup cache
477 	for (uint32_t i = 0; i < 50; ++i) {
478 		compressed_size = vm_mte_rle_compress_tags((uint8_t *) buf, C_MTE_SIZE, compressed, C_MTE_SIZE);
479 	}
480 	T_LOG("compressed_size=%u", compressed_size);
481 
482 	uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
483 
484 	for (uint32_t i = 0; i < 300000; ++i) {
485 		compressed_size = vm_mte_rle_compress_tags((uint8_t *) buf, C_MTE_SIZE, compressed, C_MTE_SIZE);
486 	}
487 	uint64_t elapsed = clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns;
488 	T_LOG("perf compress took: %llu msec", elapsed / NSEC_PER_MSEC);
489 
490 	T_PASS("OK");
491 }
492 
493 T_DECL(mte_decompress_tags_perf,
494     "Test formance of MTE tags compression",
495     T_META_ENABLED(false))
496 {
497 	my_srand(0);
498 	uint8_t buf[C_MTE_SIZE] = {};
499 	random_tag_runs_buf(buf, 16, 4);
500 
501 	uint8_t compressed[C_MTE_SIZE] = {};
502 	uint32_t compressed_size = vm_mte_rle_compress_tags((uint8_t *) buf, C_MTE_SIZE, compressed, C_MTE_SIZE);
503 	T_LOG("compressed_size=%u", compressed_size);
504 	// verify it's doing a decent amount of work
505 	T_QUIET; T_ASSERT_TRUE(compressed_size < C_MTE_SIZE && compressed_size > C_MTE_SIZE / 5 * 4, "compressed to unexpected size %u", compressed_size);
506 
507 	uint8_t decompressed[C_MTE_SIZE] = {};
508 	bool ret = 0;
509 
510 	uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
511 	for (uint32_t i = 0; i < 300000; ++i) {
512 		ret = vm_mte_rle_decompress_tags(compressed, compressed_size, (uint8_t*)decompressed, C_MTE_SIZE);
513 	}
514 
515 	uint64_t elapsed = clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns;
516 	T_QUIET; T_ASSERT_TRUE(ret, "decompress failed");
517 	T_LOG("perf decompress took: %llu msec", elapsed / NSEC_PER_MSEC);
518 
519 	T_PASS("OK");
520 }
521 
522 /****************************************************************************************
523  *  Active compressor test
524  *  This test runs creates different patterns of tags and data, triggers a page-out
525  *  to the compressor, waits for the page to be compressed and then pages it back in
526  */
527 
528 #define countof(x) (sizeof(x) / sizeof(x[0]))
529 
530 static void
zero_tags(uint8_t * buf,size_t bufsize)531 zero_tags(uint8_t* buf, size_t bufsize)
532 {
533 	for (uint32_t offset = 0; offset < bufsize; offset += 16) {
534 		__arm_mte_set_tag(buf + offset);
535 	}
536 }
537 
538 // state of single use case for convenient passing between functions
539 struct tag_pattern {
540 	uint8_t* buf_start;
541 	size_t buf_size;
542 	// a tagged pointer per every 16 bytes of the buffer
543 	uint8_t **tagged_ptrs;
544 	size_t ptrs_count;
545 	size_t ptrs_index;
546 };
547 
548 static void
tag_pattern_init(struct tag_pattern * t,uint8_t * buf_start,size_t buf_size)549 tag_pattern_init(struct tag_pattern *t, uint8_t *buf_start, size_t buf_size)
550 {
551 	t->buf_start = buf_start;
552 	t->buf_size = buf_size;
553 	t->ptrs_count = t->buf_size / VM_MEMTAG_BYTES_PER_TAG;
554 	T_LOG("  allocating %zu pointers", t->ptrs_count);
555 	t->tagged_ptrs = (uint8_t**)calloc(t->ptrs_count, sizeof(uint8_t *));
556 	t->ptrs_index = 0;
557 }
558 
559 static void
tag_pattern_push_ptr(struct tag_pattern * t,uint8_t * tagged_ptr)560 tag_pattern_push_ptr(struct tag_pattern *t, uint8_t* tagged_ptr)
561 {
562 	T_QUIET; T_ASSERT_LT(t->ptrs_index, t->ptrs_count, "ptrs_index overflow"); // test sanity
563 	t->tagged_ptrs[t->ptrs_index++] = tagged_ptr;
564 }
565 
566 static void
tag_pattern_destroy(struct tag_pattern * t)567 tag_pattern_destroy(struct tag_pattern *t)
568 {
569 	free(t->tagged_ptrs);
570 }
571 
572 static uint8_t *
tag_pattern_get_ptr(struct tag_pattern * t,size_t offset)573 tag_pattern_get_ptr(struct tag_pattern *t, size_t offset)
574 {
575 	T_QUIET; T_ASSERT_LE(offset, t->buf_size, "offset overflow"); // test sanity
576 	uint8_t *chunk_p = t->tagged_ptrs[offset / VM_MEMTAG_BYTES_PER_TAG];
577 	if (chunk_p == NULL) {
578 		return t->buf_start + offset; // no tagged pointer filled in, return plain pointer
579 	}
580 	return chunk_p + (offset % VM_MEMTAG_BYTES_PER_TAG);
581 }
582 
583 // test the correctness of the data, use the tagged pointers if they are populated
584 static uint64_t
tag_pattern_read_verify(struct tag_pattern * t,const uint8_t * orig_data)585 tag_pattern_read_verify(struct tag_pattern *t, const uint8_t* orig_data)
586 {
587 	uint64_t sum = 0;
588 	for (size_t offset = 0; offset < t->buf_size; ++offset) {
589 		uint8_t *tagged_ptr = tag_pattern_get_ptr(t, offset);
590 		uint8_t c = *tagged_ptr;
591 		sum += c;
592 		T_QUIET; T_ASSERT_EQ(c, orig_data[offset], "failed data comparison %zu : %d != %d", offset, (int)c, (int)orig_data[offset]);
593 	}
594 	return sum;
595 }
596 
597 // SV optimization that for sure ends up in the hash
598 static void
fill_zeros(uint8_t * buf,size_t bufsize)599 fill_zeros(uint8_t *buf, size_t bufsize)
600 {
601 	// do nothing, test function zeros buffer after allocation
602 }
603 
604 // SV optimization
605 static void
fill_same(uint8_t * buf,size_t bufsize)606 fill_same(uint8_t *buf, size_t bufsize)
607 {
608 	memset((void*)buf, 'A', bufsize);
609 }
610 
611 static void
fill_only_first_byte(uint8_t * buf,size_t bufsize)612 fill_only_first_byte(uint8_t *buf, size_t bufsize)
613 {
614 	buf[0] = 'A';
615 }
616 
617 // should be nicely compressible by wkdm
618 static void
fill_counter(uint8_t * buf,size_t bufsize)619 fill_counter(uint8_t *buf, size_t bufsize)
620 {
621 	uint32_t *ibuf = (uint32_t *)buf; // this cast is ok since buf has page alignment
622 	for (size_t i = 0; i < bufsize / sizeof(uint32_t); ++i) {
623 		ibuf[i] = 0x11111111 + (uint32_t)i;
624 	}
625 }
626 
627 // should be uncompressible by wkdm
628 static void
fill_rand(uint8_t * buf,size_t bufsize)629 fill_rand(uint8_t *buf, size_t bufsize)
630 {
631 	for (size_t i = 0; i < bufsize; ++i) {
632 		buf[i] = my_rand() % 0xff;
633 	}
634 }
635 
636 // increments vm.tags_below_align
637 static void
tag_pattern_single_at_start(struct tag_pattern * t)638 tag_pattern_single_at_start(struct tag_pattern *t)
639 {
640 	uint8_t *buf = t->buf_start;
641 	uint8_t *orig_tagged_ptr = __arm_mte_get_tag(buf);
642 	uint64_t mask = __arm_mte_exclude_tag(orig_tagged_ptr, 0);
643 	uint8_t *tagged_buf = __arm_mte_create_random_tag(buf, mask);
644 	__arm_mte_set_tag(tagged_buf);
645 	tag_pattern_push_ptr(t, tagged_buf);
646 	// the rest remain NULL
647 }
648 
649 // every consecutive 16 bytes has a different tag (worst case for RLE algorithm)
650 // increments vm.tags_incompressible
651 static void
tag_pattern_max_mix(struct tag_pattern * t)652 tag_pattern_max_mix(struct tag_pattern *t)
653 {
654 	uint8_t *prev_tagged_ptr = NULL;
655 	for (size_t offset = 0; offset < t->buf_size; offset += VM_MEMTAG_BYTES_PER_TAG) {
656 		uint8_t *ptr = t->buf_start + offset;
657 		uint8_t *orig_tagged_ptr = __arm_mte_get_tag(ptr);
658 		uint64_t mask = __arm_mte_exclude_tag(orig_tagged_ptr, 0);
659 		mask = __arm_mte_exclude_tag(prev_tagged_ptr, mask);  // don't want consecutive tags to be the same
660 		uint8_t *tagged_ptr = __arm_mte_create_random_tag(ptr, mask);
661 		__arm_mte_set_tag(tagged_ptr);
662 		tag_pattern_push_ptr(t, tagged_ptr);
663 		prev_tagged_ptr = tagged_ptr;
664 	}
665 	T_LOG("  got %zu pointers", t->ptrs_index);
666 }
667 
668 static uint8_t *
tag_fill(struct tag_pattern * t,uint8_t * buf,size_t buf_size,uint8_t * prev_ptr)669 tag_fill(struct tag_pattern *t, uint8_t* buf, size_t buf_size, uint8_t* prev_ptr)
670 {
671 	T_QUIET; T_ASSERT_EQ(buf_size % VM_MEMTAG_BYTES_PER_TAG, 0ul, "unexpected buf_size %zu", buf_size);
672 	uint8_t *orig_tagged_ptr = __arm_mte_get_tag(buf);
673 	uint64_t mask = __arm_mte_exclude_tag(orig_tagged_ptr, 0);
674 	mask = __arm_mte_exclude_tag(prev_ptr, mask); // new tag should be different from previous
675 	uint8_t *tagged_buf = __arm_mte_create_random_tag(buf, mask);
676 	uintptr_t only_tag = (uintptr_t)tagged_buf & TAG_MASK;
677 
678 	for (size_t offset = 0; offset < buf_size; offset += VM_MEMTAG_BYTES_PER_TAG) {
679 		T_QUIET; T_ASSERT_LE(offset, t->buf_size, "fill_tag overflow"); // test sanity
680 		uint8_t *ptr = buf + offset;
681 		uint8_t *tagged_ptr = (uint8_t *)((uintptr_t)ptr | only_tag);
682 		__arm_mte_set_tag(tagged_ptr);
683 		tag_pattern_push_ptr(t, tagged_ptr);
684 	}
685 	return tagged_buf;
686 }
687 
688 // the entire page has the same non-zero tag
689 static void
tag_pattern_all_same(struct tag_pattern * t)690 tag_pattern_all_same(struct tag_pattern *t)
691 {
692 	tag_fill(t, t->buf_start, t->buf_size, NULL);
693 }
694 
695 static void
tag_patten_all_zero(struct tag_pattern * t)696 tag_patten_all_zero(struct tag_pattern *t)
697 {
698 	// do nothing, all tags are initialized to zero by the text function
699 }
700 
701 // increments vm.tags_below_align
702 static void
tag_pattern_half_and_half(struct tag_pattern * t)703 tag_pattern_half_and_half(struct tag_pattern *t)
704 {
705 	size_t sz = t->buf_size / 2;
706 	uint8_t *prev = tag_fill(t, t->buf_start, sz, NULL);
707 	tag_fill(t, t->buf_start + sz, sz, prev);
708 }
709 
710 // increments vm.tags_above_align
711 static void
tag_pattern_odd_chunks(struct tag_pattern * t)712 tag_pattern_odd_chunks(struct tag_pattern *t)
713 {
714 	size_t sizes[] = {31, 31, 63, 63, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31}; // should sum to less than 1024
715 	size_t offset = 0;
716 	uint8_t *prev = NULL;
717 	for (size_t i = 0; i < countof(sizes); ++i) {
718 		size_t sz = sizes[i] * VM_MEMTAG_BYTES_PER_TAG;
719 		prev = tag_fill(t, t->buf_start + offset, sz, prev);
720 		offset += sz;
721 	}
722 	T_LOG("  reached offset %zu, got %zu pointers", offset, t->ptrs_index);
723 }
724 
725 // --- the following functions uses the compressor sysctls to track that things are progressing as expected ---
726 
727 // keeps the state of the compressor sysctls
728 struct tags_sysctls {
729 	uint64_t pages_compressed;
730 
731 	uint64_t all_zero;
732 	uint64_t same_value;
733 	uint64_t below_align;
734 	uint64_t above_align;
735 	uint64_t incompressible;
736 
737 	uint64_t pages_decompressed;
738 	uint64_t pages_freed;
739 	uint64_t pages_corrupted;
740 
741 	int64_t overhead_bytes; // can be negative on diffs
742 	int64_t start_overhead_bytes; // for comparing the very start to the end
743 	int64_t tagged_pages;
744 
745 	// unrelated to tagging, but interesting to see as well
746 	uint64_t wk_compressions;
747 };
748 
749 // sample all the sysctls we're interested in
750 static void
tags_sysctls_sample(struct tags_sysctls * s)751 tags_sysctls_sample(struct tags_sysctls *s)
752 {
753 	s->pages_compressed = sysctl_get_Q("vm.mte.compress_pages_compressed");
754 #if DEVELOPMENT || DEBUG
755 	s->all_zero = sysctl_get_Q("vm.mte.compress_all_zero");
756 	s->same_value = sysctl_get_Q("vm.mte.compress_same_value");
757 	s->below_align = sysctl_get_Q("vm.mte.compress_below_align");
758 	s->above_align = sysctl_get_Q("vm.mte.compress_above_align");
759 	s->incompressible = sysctl_get_Q("vm.mte.compress_incompressible");
760 #endif /* DEVELOPMENT || DEBUG */
761 	s->pages_decompressed = sysctl_get_Q("vm.mte.compress_pages_decompressed");
762 	s->pages_freed = sysctl_get_Q("vm.mte.compress_pages_freed");
763 	s->pages_corrupted = sysctl_get_Q("vm.mte.compress_pages_corrupted");
764 	s->overhead_bytes = (int64_t)sysctl_get_Q("vm.mte.compress_overhead_bytes");
765 	s->tagged_pages = (int64_t)sysctl_get_Q("vm.mte.compress_pages");
766 	s->wk_compressions = sysctl_get_Q("vm.wk_compressions");
767 }
768 
769 static void
tags_sysctl_update(struct tags_sysctls * s,struct tags_sysctls * sample)770 tags_sysctl_update(struct tags_sysctls *s, struct tags_sysctls *sample)
771 { // update the sysctl state with the latest sample but preserve start_overhead_bytes
772 	int64_t start_bytes = s->start_overhead_bytes;
773 	*s = *sample;
774 	s->start_overhead_bytes = start_bytes;
775 }
776 
777 // sample and diff with the previous sample
778 static void
tags_sysctls_sample_diff(const struct tags_sysctls * start,struct tags_sysctls * sample,struct tags_sysctls * d)779 tags_sysctls_sample_diff(const struct tags_sysctls *start, struct tags_sysctls *sample, struct tags_sysctls *d)
780 {
781 	tags_sysctls_sample(sample);
782 #define SUB_FIELD(field) d->field = sample->field - start->field
783 	SUB_FIELD(pages_compressed);
784 	SUB_FIELD(all_zero);
785 	SUB_FIELD(same_value);
786 	SUB_FIELD(below_align);
787 	SUB_FIELD(above_align);
788 	SUB_FIELD(incompressible);
789 	SUB_FIELD(pages_decompressed);
790 	SUB_FIELD(pages_freed);
791 	SUB_FIELD(pages_corrupted);
792 	SUB_FIELD(overhead_bytes);
793 	SUB_FIELD(tagged_pages);
794 	SUB_FIELD(wk_compressions);
795 #undef SUB_FIELD
796 }
797 
798 static void
tags_sysctls_print(const struct tags_sysctls * s,const char * desc)799 tags_sysctls_print(const struct tags_sysctls *s, const char* desc)
800 {
801 	T_LOG("  %s  comp: %llu | zero: %llu  same: %llu  below: %llu  above: %llu  incomp: %llu | decomp:%llu  freed:%llu  corrupt:%llu | bytes:%lld  pages:%lld | wk_comp:%llu",
802 	    desc, s->pages_compressed, s->all_zero, s->same_value, s->below_align, s->above_align, s->incompressible,
803 	    s->pages_decompressed, s->pages_freed, s->pages_corrupted, s->overhead_bytes, s->tagged_pages, s->wk_compressions);
804 }
805 
806 // called before the compressor work
807 static void
tags_sysctl_start(struct tags_sysctls * s)808 tags_sysctl_start(struct tags_sysctls *s)
809 {
810 	tags_sysctls_sample(s);
811 	s->start_overhead_bytes = s->overhead_bytes;
812 	tags_sysctls_print(s, "START ");
813 }
814 
815 #define SYSCTL_ALL_ZERO 1
816 #define SYSCTL_SAME_VALUE 2
817 #define SYSCTL_BELOW_ALIGN 3
818 #define SYSCTL_ABOVE_ALIGN 4
819 #define SYSCTL_INCOMPRESSIBLE 5
820 
821 // uncomment this to make the asserts on the incremented statistics strict to the expected value. This assumes the
822 // tester is the only MTE enabled process not idle.
823 // This is undesirable if there are other MTE enabled processes which might page-out to the compressor
824 // while the test is running, which is the case in BATS.
825 // #define STRICT_STATS_EQ
826 
827 #ifdef STRICT_STATS_EQ
828 #define ASSERT_STAT_ATLEAST T_ASSERT_EQ
829 #else
830 #define ASSERT_STAT_ATLEAST T_ASSERT_GE
831 #endif
832 
833 static void
wait_compressed(struct tags_sysctls * start,uint32_t expected_increment)834 wait_compressed(struct tags_sysctls* start, uint32_t expected_increment)
835 {
836 	// start with a sleep to give it a first chance to settle
837 	usleep(10000);
838 	struct tags_sysctls sample, d;
839 	int iter = 1; // on account of the usleep above
840 	while (true) {
841 		tags_sysctls_sample_diff(start, &sample, &d);
842 		if (d.pages_compressed > 0) {
843 			T_QUIET; ASSERT_STAT_ATLEAST(d.pages_compressed, 1ull, "compressed more than 1 page, are you running something in parallel?");
844 			break;
845 		}
846 		usleep(10000);
847 		++iter;
848 		if (iter > 10) {
849 			T_ASSERT_FAIL("waiting too long for page-out. is MTE in the compressor enabled?");
850 			break;
851 		}
852 	}
853 	T_LOG("  waited for tags compression after %d msec", iter * 10);
854 	tags_sysctls_print(&d, "WAITED");
855 
856 	// check the expected sysctl was incremented
857 #define CHECK_INC(field_name, field_num) \
858 	                T_QUIET; ASSERT_STAT_ATLEAST(d.field_name, (expected_increment == field_num) ? 1ull : 0ull, "unexpected increment value")
859 	CHECK_INC(all_zero, SYSCTL_ALL_ZERO);
860 	CHECK_INC(same_value, SYSCTL_SAME_VALUE);
861 	CHECK_INC(below_align, SYSCTL_BELOW_ALIGN);
862 	CHECK_INC(above_align, SYSCTL_ABOVE_ALIGN);
863 	CHECK_INC(incompressible, SYSCTL_INCOMPRESSIBLE);
864 #undef CHECK_INC
865 	tags_sysctl_update(start, &sample); // reset it for the next check
866 }
867 
868 static void
check_sysctls_after_pagein(struct tags_sysctls * start)869 check_sysctls_after_pagein(struct tags_sysctls* start)
870 {
871 	struct tags_sysctls sample, d;
872 	tags_sysctls_sample_diff(start, &sample, &d);
873 	tags_sysctls_print(&d, "PAGEIN");
874 
875 	T_QUIET; ASSERT_STAT_ATLEAST(d.pages_decompressed, 1ull, "check counter");
876 	T_QUIET; ASSERT_STAT_ATLEAST(d.pages_freed, 0ull, "check counter");
877 	T_QUIET; ASSERT_STAT_ATLEAST(d.pages_corrupted, 0ull, "check counter");
878 	// after page-in overhead returns to 0
879 	T_QUIET; ASSERT_STAT_ATLEAST(start->start_overhead_bytes - sample.overhead_bytes, 0ll, "check overhead bytes");
880 	tags_sysctl_update(start, &sample);;
881 }
882 
883 static void
check_sysctls_after_dealloc(struct tags_sysctls * start,bool did_pagein)884 check_sysctls_after_dealloc(struct tags_sysctls* start, bool did_pagein)
885 {
886 	struct tags_sysctls sample, d;
887 	tags_sysctls_sample_diff(start, &sample, &d);
888 	tags_sysctls_print(&d, "DEALLOC");
889 
890 	if (!did_pagein) {
891 		T_QUIET; ASSERT_STAT_ATLEAST(d.pages_freed, 1ull, "check counter");
892 	} else {
893 		T_QUIET; ASSERT_STAT_ATLEAST(d.pages_freed, 0ull, "check counter");
894 	}
895 	T_QUIET; ASSERT_STAT_ATLEAST(d.pages_decompressed, 0ull, "check counter");
896 	T_QUIET; ASSERT_STAT_ATLEAST(d.pages_corrupted, 0ull, "check counter");
897 	T_QUIET; ASSERT_STAT_ATLEAST(start->start_overhead_bytes - sample.overhead_bytes, 0ll, "check overhead bytes");
898 	tags_sysctl_update(start, &sample);;
899 }
900 
901 // --- main test function ---
902 
903 typedef void (*fn_fill)(uint8_t *buf, size_t bufsize);
904 typedef void (*fn_do_tags)(struct tag_pattern *t);
905 
906 struct tags_fill_t {
907 	fn_do_tags do_tags_func;
908 	const char *name;
909 	uint32_t expect_sysctl_increment;
910 };
911 
912 struct data_fill_t {
913 	fn_fill fill_func;
914 	const char *name;
915 };
916 
917 #define WAIT_INTERACTIVE 1
918 #define DONT_PAGEIN 2
919 #define PRELOAD_COMPRESSED_BYTES 4
920 
921 static void
test_pattern(struct data_fill_t data_fill,struct tags_fill_t tags_fill,uint32_t flags)922 test_pattern(struct data_fill_t data_fill, struct tags_fill_t tags_fill, uint32_t flags)
923 {
924 	T_LOG("---------- Running: fill:%s tags:%s... ----------", data_fill.name, tags_fill.name);
925 	size_t bufsize = PAGE_SIZE;
926 	vm_address_t address = 0;
927 	kern_return_t kr = vm_allocate(mach_task_self(), &address, bufsize, VM_FLAGS_ANYWHERE | VM_FLAGS_MTE);
928 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_allocate(VM_FLAGS_MTE)");
929 
930 	uint8_t *buf = (uint8_t*)address;
931 	uint8_t *copy_buf = (uint8_t *)malloc(bufsize); // will hold a copy of the data for comparing after page-in
932 
933 	memset((void*)buf, 0, bufsize);
934 	zero_tags(buf, bufsize);
935 
936 	// fill page with data
937 	data_fill.fill_func(buf, bufsize);
938 	memcpy(copy_buf, buf, bufsize); // make a copy for later comparison
939 
940 	struct tag_pattern t;
941 	tag_pattern_init(&t, buf, bufsize);
942 	T_LOG("  tagging");
943 	tags_fill.do_tags_func(&t);
944 	T_LOG("    verify-read");
945 	// verify we can indeed read all tags
946 	tag_pattern_read_verify(&t, copy_buf);
947 
948 	struct tags_sysctls ts; // updated with the latest sysctl sample after each phase
949 	tags_sysctl_start(&ts);
950 
951 	T_LOG("  paging-out");
952 	kr = mach_vm_behavior_set(mach_task_self(), (mach_vm_address_t)buf, bufsize, VM_BEHAVIOR_PAGEOUT);
953 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "failed mach_vm_behavior_set() %p,%zu - %d", buf, bufsize, kr);
954 
955 	wait_compressed(&ts, tags_fill.expect_sysctl_increment);
956 
957 	if (flags & WAIT_INTERACTIVE) {
958 		getchar();
959 	}
960 
961 	if (!(flags & DONT_PAGEIN)) {
962 		T_LOG("  paging-in");
963 		tag_pattern_read_verify(&t, copy_buf);
964 		check_sysctls_after_pagein(&ts);
965 	}
966 	T_LOG("  deallocating");
967 	kr = vm_deallocate(mach_task_self(), address, bufsize);
968 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_deallocate");
969 
970 	check_sysctls_after_dealloc(&ts, !(flags & DONT_PAGEIN));
971 
972 	tag_pattern_destroy(&t);
973 	T_PASS("OK");
974 }
975 
976 struct test_buf {
977 	vm_address_t address;
978 	size_t bufsize;
979 };
980 // this is just a simpler version of the above, split to two functions. This is meant so that there would be already
981 // something in the compressor while the test is running
982 static void
preload_compressed_bytes(struct test_buf * b)983 preload_compressed_bytes(struct test_buf *b)
984 {
985 	T_LOG("---- preloading the compressor ----");
986 	size_t bufsize = PAGE_SIZE;
987 	vm_address_t address = 0;
988 	kern_return_t kr = vm_allocate(mach_task_self(), &address, bufsize, VM_FLAGS_ANYWHERE | VM_FLAGS_MTE);
989 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_allocate(VM_FLAGS_MTE)");
990 
991 	uint8_t *buf = (uint8_t*)address;
992 	memset((void*)buf, 0, bufsize);
993 	zero_tags(buf, bufsize);
994 
995 	// set non-zero tags
996 	struct tag_pattern t;
997 	tag_pattern_init(&t, buf, bufsize);
998 	tag_pattern_max_mix(&t);
999 	tag_pattern_destroy(&t); // don't need to verify it later
1000 
1001 	struct tags_sysctls ts;
1002 	tags_sysctl_start(&ts);
1003 
1004 	T_LOG("  paging-out (preload)");
1005 	kr = mach_vm_behavior_set(mach_task_self(), (mach_vm_address_t)buf, bufsize, VM_BEHAVIOR_PAGEOUT);
1006 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "failed mach_vm_behavior_set() %p,%zu - %d", buf, bufsize, kr);
1007 
1008 	wait_compressed(&ts, SYSCTL_INCOMPRESSIBLE);
1009 	b->address = address;
1010 	b->bufsize = bufsize;
1011 }
1012 
1013 static void
un_preload_compressed_bytes(struct test_buf * b)1014 un_preload_compressed_bytes(struct test_buf *b)
1015 {
1016 	T_LOG("---- un-preloading ----");
1017 	kern_return_t kr = vm_deallocate(mach_task_self(), b->address, b->bufsize);
1018 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_deallocate");
1019 }
1020 
1021 static struct tags_fill_t tags_fills[] = {
1022 	{ &tag_pattern_single_at_start, "single_at_start", SYSCTL_BELOW_ALIGN },
1023 	{ &tag_pattern_max_mix, "max-mix", SYSCTL_INCOMPRESSIBLE },
1024 	{ &tag_patten_all_zero, "all-zero", SYSCTL_ALL_ZERO },
1025 	{ &tag_pattern_all_same, "all-same", SYSCTL_SAME_VALUE},
1026 	{ &tag_pattern_half_and_half, "halfs", SYSCTL_BELOW_ALIGN },
1027 	{ &tag_pattern_odd_chunks, "odd-chunks", SYSCTL_ABOVE_ALIGN }
1028 };
1029 
1030 static struct data_fill_t data_fills[] = {
1031 	{ &fill_zeros, "zeros" },
1032 	{ &fill_same, "same" },
1033 	{ &fill_only_first_byte, "first-byte" },
1034 	{ &fill_counter, "counter" },
1035 	{ &fill_rand, "rand" }
1036 };
1037 
1038 void
run_all_patterns(int flags)1039 run_all_patterns(int flags)
1040 {
1041 	my_srand(0);
1042 	struct test_buf b;
1043 	if (flags & PRELOAD_COMPRESSED_BYTES) {
1044 		preload_compressed_bytes(&b);
1045 	}
1046 	if (flags & PRELOAD_COMPRESSED_BYTES) {
1047 		preload_compressed_bytes(&b);
1048 	}
1049 	for (size_t fpi = 0; fpi < countof(data_fills); ++fpi) {
1050 		for (size_t tpi = 0; tpi < countof(tags_fills); ++tpi) {
1051 			test_pattern(data_fills[fpi], tags_fills[tpi], flags);
1052 		}
1053 	}
1054 	if (flags & PRELOAD_COMPRESSED_BYTES) {
1055 		un_preload_compressed_bytes(&b);
1056 	}
1057 }
1058 
1059 T_DECL(mte_compressor_paging,
1060     "Test paging out to the compressor and paging in from the compressor of MTE pages",
1061     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1062     XNU_T_META_SOC_SPECIFIC)
1063 {
1064 	run_all_patterns(0);
1065 	run_all_patterns(PRELOAD_COMPRESSED_BYTES);
1066 }
1067 
1068 T_DECL(mte_compressor_no_pageing,
1069     "Test what happens if the tagged memory is not paged-in before being deallocated",
1070     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1071     XNU_T_META_SOC_SPECIFIC)
1072 {
1073 	run_all_patterns(DONT_PAGEIN);
1074 	run_all_patterns(DONT_PAGEIN | PRELOAD_COMPRESSED_BYTES);
1075 }
1076 
1077 static size_t
read_big_sysctl(const char * name,char ** buf)1078 read_big_sysctl(const char *name, char **buf)
1079 {
1080 	size_t len = 0;
1081 	int rc = sysctlbyname(name, NULL, &len, NULL, 0); // get the length of the needed buffer
1082 	T_ASSERT_POSIX_SUCCESS(rc, "query size of sysctl `%s`", name);
1083 	T_ASSERT_GT(len, (size_t)0, "sysctl got size 0");
1084 	len += 4096; // allocate a bit extra in case the size changed between the two calls
1085 	*buf = (char*)malloc(len);
1086 	T_ASSERT_NE_PTR((void*)*buf, NULL, "allocation for sysctl %zu", len);
1087 	rc = sysctlbyname(name, *buf, &len, NULL, 0);
1088 	T_ASSERT_POSIX_SUCCESS(rc, "query of sysctl `%s`", name);
1089 	return len;
1090 }
1091 
1092 //#define CSEGS_VERBOSE
1093 #ifdef CSEGS_VERBOSE
1094 #define T_LOG_VERBOSE(...) T_LOG(__VA_ARGS__)
1095 #else
1096 #define T_LOG_VERBOSE(...)
1097 #endif
1098 
1099 // this uses the sysctl that dumps all the compressor metadata to calculate the MTE bytes overhead
1100 static void
get_mte_size_from_csegs(uint64_t * bytes_overhead,uint64_t * tagged_pages)1101 get_mte_size_from_csegs(uint64_t *bytes_overhead, uint64_t *tagged_pages)
1102 {
1103 	uint64_t compressed_bytes = 0; // before alignment
1104 	*bytes_overhead = 0;
1105 	*tagged_pages = 0;
1106 
1107 	char *buf = NULL;
1108 	size_t sz = read_big_sysctl("vm.compressor_segments", &buf);
1109 
1110 	size_t offset = 0;
1111 	T_QUIET; T_ASSERT_GE_ULONG(sz, sizeof(uint32_t), "got buffer shorter than the magic value");
1112 	uint32_t hdr_magic = *((uint32_t*)buf);
1113 	T_ASSERT_EQ_UINT(hdr_magic, VM_C_SEGMENT_INFO_MAGIC, "match magic value");
1114 	offset += sizeof(uint32_t);
1115 	while (offset < sz) {
1116 		// read next c_segment
1117 		T_QUIET; T_ASSERT_LE(offset + sizeof(struct c_segment_info), sz, "unexpected offset for c_segment_info");
1118 		const struct c_segment_info* cseg = (const struct c_segment_info*)(buf + offset);
1119 		offset += sizeof(struct c_segment_info);
1120 		// read its slots
1121 		bool logged_segment = false;
1122 		T_QUIET; T_ASSERT_LE(offset + cseg->csi_slots_len * sizeof(struct c_slot_info), sz, "unexpected offset for c_slot_info");
1123 		for (int i = 0; i < cseg->csi_slots_len; ++i) {
1124 			const struct c_slot_info *slot = (const struct c_slot_info*)&cseg->csi_slots[i];
1125 			if (slot->csi_mte_size == 0) {
1126 				continue;
1127 			}
1128 			++(*tagged_pages);
1129 			uint32_t actual_size = vm_mte_compressed_tags_actual_size(slot->csi_mte_size);
1130 			if (actual_size > 0) {
1131 				compressed_bytes += slot->csi_mte_size;
1132 				*bytes_overhead += C_SEG_ROUND_TO_ALIGNMENT(slot->csi_mte_size);
1133 			}
1134 			T_QUIET; T_ASSERT_FALSE(slot->csi_mte_has_data, "unexpected has_data");
1135 
1136 			if (!logged_segment) {
1137 				T_LOG_VERBOSE("segment %u  bytes-used: %d", cseg->csi_mysegno, cseg->csi_bytes_used);
1138 				logged_segment = true;
1139 			}
1140 			T_LOG_VERBOSE("   slot %d: size=%u  mte_size=%u", i, (uint32_t) slot->csi_size, (uint32_t) slot->csi_mte_size);
1141 		}
1142 		offset += cseg->csi_slots_len * sizeof(struct c_slot_info);
1143 	}
1144 	T_LOG("compressed_bytes=%llu  aligned=%llu  tagged_pages=%llu", compressed_bytes, *bytes_overhead, *tagged_pages);
1145 }
1146 
1147 static void
counters_verify()1148 counters_verify()
1149 {
1150 	// this comparison may fail since it is inherently racy, getting the same number in 2 sligtly different times.
1151 	T_MAYFAIL;
1152 	uint64_t bytes_from_csegs = 0, pages_from_csegs = 0;
1153 	get_mte_size_from_csegs(&bytes_from_csegs, &pages_from_csegs);
1154 	uint64_t bytes_from_sysctl = sysctl_get_Q("vm.mte.compress_overhead_bytes");
1155 	uint64_t pages_from_sysctl = sysctl_get_Q("vm.mte.compress_pages");
1156 	T_ASSERT_EQ(bytes_from_csegs, bytes_from_sysctl, "overhead bytes count match");
1157 	T_ASSERT_EQ(pages_from_csegs, pages_from_sysctl, "tagged pages count match");
1158 }
1159 
1160 static vm_address_t
make_rand_tagged_buf(size_t bufsize)1161 make_rand_tagged_buf(size_t bufsize)
1162 {
1163 	my_srand(0);
1164 	T_LOG("filling buffer size 0x%zx", bufsize);
1165 	vm_address_t address;
1166 	kern_return_t kr = vm_allocate(mach_task_self(), &address, bufsize, VM_FLAGS_ANYWHERE | VM_FLAGS_MTE);
1167 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_allocate(VM_FLAGS_MTE)");
1168 
1169 	uint8_t *buf = (uint8_t*)address;
1170 	memset((void*)buf, 0, bufsize);
1171 	zero_tags(buf, bufsize);
1172 
1173 	// fill each page with different fill and tag patterns
1174 	for (int i = 0; i < bufsize / PAGE_SIZE; ++i) {
1175 		struct tag_pattern t;
1176 		uint8_t *it_buf = buf + i * PAGE_SIZE;
1177 		size_t it_size = PAGE_SIZE;
1178 		tag_pattern_init(&t, it_buf, it_size);
1179 
1180 		struct data_fill_t *df = &data_fills[(my_rand() >> 1) % countof(data_fills)];
1181 		df->fill_func(it_buf, it_size);
1182 		int tf_ind = (my_rand() >> 1) % countof(tags_fills);
1183 		struct tags_fill_t *tf = &tags_fills[tf_ind];
1184 		tf->do_tags_func(&t);
1185 
1186 		tag_pattern_destroy(&t);
1187 	}
1188 	return address;
1189 }
1190 
1191 static void
page_out(vm_address_t address,size_t bufsize)1192 page_out(vm_address_t address, size_t bufsize)
1193 {
1194 	T_LOG("paging-out");
1195 	kern_return_t kr = mach_vm_behavior_set(mach_task_self(), (mach_vm_address_t)address, bufsize, VM_BEHAVIOR_PAGEOUT);
1196 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "failed mach_vm_behavior_set() %lx,%zu - %d", address, bufsize, kr);
1197 }
1198 
1199 static void
print_stats()1200 print_stats()
1201 {
1202 	struct tags_sysctls ts;
1203 	tags_sysctls_sample(&ts);
1204 	tags_sysctls_print(&ts, "STATS");
1205 }
1206 
1207 static void
dealloc(vm_address_t address,size_t bufsize)1208 dealloc(vm_address_t address, size_t bufsize)
1209 {
1210 	T_LOG("  deallocating");
1211 	kern_return_t kr = vm_deallocate(mach_task_self(), address, bufsize);
1212 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_deallocate");
1213 }
1214 
1215 // This test is useful for running after a some heavy MTE processes have ran and finished
1216 // the make sure that the bytes number maintained in the sysctl is the same as the actual mte_sizes in the segments
1217 T_DECL(mte_compressor_counters_verify,
1218     "Verify that the overhead bytes statistics match the size as it appears in the segments",
1219     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1220     XNU_T_META_SOC_SPECIFIC)
1221 {
1222 	counters_verify();
1223 	print_stats();
1224 }
1225 
1226 
1227 T_DECL(mte_compressor_exercise_counters_verify,
1228     "Exericise the MTE tags compress, then verify that the overhead bytes statistics match the size as it appears in the segments",
1229     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1230     XNU_T_META_SOC_SPECIFIC)
1231 {
1232 	size_t bufsize = 100 * PAGE_SIZE;
1233 	vm_address_t address = make_rand_tagged_buf(bufsize);
1234 	page_out(address, bufsize);
1235 	usleep(20000); // wait for the compressor to finish
1236 	counters_verify();
1237 	print_stats();
1238 	dealloc(address, bufsize);
1239 }
1240 
1241 static void
dump_buffer(const char * path,const char * buf,size_t sz)1242 dump_buffer(const char *path, const char *buf, size_t sz)
1243 {
1244 	FILE *f = fopen(path, "w");
1245 	T_QUIET; T_ASSERT_NOTNULL(f, "Failed to open file %s", path);
1246 	T_QUIET; T_ASSERT_EQ(fwrite(buf, 1, sz, f), sz, "Failed to write to file %s", path);
1247 	T_QUIET; T_ASSERT_EQ(fclose(f), 0, "Failed to close file %s", path);
1248 }
1249 
1250 static size_t
read_file(const char * path,char ** buf_ptr)1251 read_file(const char *path, char **buf_ptr)
1252 {
1253 	FILE *f = fopen(path, "r");
1254 	T_QUIET; T_ASSERT_NOTNULL(f, "Failed to open file %s", path);
1255 	T_QUIET; T_ASSERT_EQ(fseek(f, 0, SEEK_END), 0, "Faile to seek in file %s", path);
1256 	size_t sz = ftell(f);
1257 	T_QUIET; T_ASSERT_GT(sz, (size_t)0, "Empty file %s", path);
1258 	T_QUIET; T_ASSERT_EQ(fseek(f, 0, SEEK_SET), 0, "Faile to seek in file %s", path);
1259 	*buf_ptr = (char *)malloc(sz);
1260 	T_QUIET; T_ASSERT_EQ(fread(*buf_ptr, 1, sz, f), sz, "Failed to read from file %s", path);
1261 	T_QUIET; T_ASSERT_EQ(fclose(f), 0, "Failed to close file %s", path);
1262 	return sz;
1263 }
1264 
1265 static void
1266 get_mte_compressed_tags(
1267 	void (^process_cseg)(uint32_t state),
1268 	void (^process_cslot)(int slot_idx, uint8_t *compressed_buf, uint32_t compressed_size, uint32_t actual_size),
1269 	const char *load_from_file, const char *dump_to_file)
1270 {
1271 	char *buf = NULL;
1272 	size_t sz = 0;
1273 	if (load_from_file == NULL) {
1274 		// if this buffer gets big reading it may fail when under memory pressure since it requires a big
1275 		// memory allocation in the kernel
1276 		T_MAYFAIL;
1277 		sz = read_big_sysctl("vm.compressor_segments_data", &buf);
1278 	} else {
1279 		sz = read_file(load_from_file, &buf);
1280 	}
1281 	if (dump_to_file != NULL) {
1282 		dump_buffer(dump_to_file, buf, sz);
1283 	}
1284 
1285 	size_t offset = 0;
1286 	T_QUIET; T_ASSERT_GE_ULONG(sz, sizeof(uint32_t), "got buffer shorter than the magic value");
1287 	uint32_t hdr_magic = *((uint32_t*)buf);
1288 	T_ASSERT_EQ_UINT(hdr_magic, VM_C_SEGMENT_INFO_MAGIC_WITH_TAGS, "match magic value");
1289 	offset += sizeof(uint32_t);
1290 	while (offset < sz) {
1291 		// read next c_segment
1292 		T_QUIET; T_ASSERT_LE(offset + sizeof(struct c_segment_info), sz, "unexpected offset for c_segment_info");
1293 		const struct c_segment_info* cseg = (const struct c_segment_info*)(buf + offset);
1294 		process_cseg(cseg->csi_state);
1295 		offset += sizeof(struct c_segment_info);
1296 		// read its slots
1297 		for (int si = 0; si < cseg->csi_slots_len; ++si) {
1298 			T_QUIET; T_ASSERT_LE(offset + sizeof(struct c_slot_info), sz, "unexpected offset for c_slot_info");
1299 			const struct c_slot_info *slot = (const struct c_slot_info*)(buf + offset);
1300 			offset += sizeof(struct c_slot_info);
1301 			if (slot->csi_mte_size == 0 || !slot->csi_mte_has_data) {
1302 				continue;
1303 			}
1304 			uint32_t actual_size = vm_mte_compressed_tags_actual_size(slot->csi_mte_size);
1305 			uint8_t *data_ptr = NULL;
1306 			if (actual_size > 0) {
1307 				T_QUIET; T_ASSERT_LE(offset + actual_size, sz, "unexpected offset for tags data");
1308 				// compressed tag data is at the end of the c_slot_info
1309 				data_ptr = (uint8_t *)slot + sizeof(struct c_slot_info);
1310 			}
1311 			process_cslot(si, data_ptr, slot->csi_mte_size, actual_size);
1312 			offset += actual_size;
1313 		}
1314 	}
1315 	free(buf);
1316 }
1317 
1318 static void
print_comp_hist(struct comp_histogram * comp_hist)1319 print_comp_hist(struct comp_histogram *comp_hist)
1320 {
1321 	T_LOG("RLE cmd histogram:");
1322 	for (int i = 0; i < countof(comp_hist->cmd_bins); ++i) {
1323 		T_LOG("|  %x,  %llu", i, comp_hist->cmd_bins[i]);
1324 	}
1325 	T_LOG("Total: %llu cmds", comp_hist->cmd_total);
1326 	T_LOG("Compressed size histogram:");
1327 	T_LOG("|  sv,  %llu", comp_hist->same_value_count);
1328 	for (int i = 0; i < countof(comp_hist->comp_size_bins); ++i) {
1329 		T_LOG("|  %d,  %llu", (i + 1) * C_SEG_OFFSET_ALIGNMENT_BOUNDARY, comp_hist->comp_size_bins[i]);
1330 	}
1331 }
1332 
1333 #define C_STATE_COUNT 11
1334 
1335 struct cseg_histogram {
1336 	uint64_t csegs_per_state[C_STATE_COUNT + 1];
1337 };
1338 
1339 static void
analyse_rle_runs(const char * load_from_file,const char * dump_to_file,bool show_lens,bool show_recompress,bool show_cseg_state)1340 analyse_rle_runs(const char* load_from_file, const char* dump_to_file, bool show_lens, bool show_recompress, bool show_cseg_state)
1341 {
1342 	struct comp_histogram comp_hist = {}, *comp_hist_ptr = &comp_hist;
1343 	struct runs_histogram run_hist = {}, *run_hist_ptr = &run_hist;
1344 	struct comp_histogram re_comp_hist = {}, *re_comp_hist_ptr = &re_comp_hist;
1345 	struct cseg_histogram cseg_hist = {}, *cseg_hist_ptr = &cseg_hist;
1346 	get_mte_compressed_tags(
1347 		^void (uint32_t cseg_state) {
1348 		cseg_hist_ptr->csegs_per_state[MIN(cseg_state, C_STATE_COUNT)]++;
1349 	},
1350 		^void (int slot_idx, uint8_t *compressed_buf, uint32_t compressed_size, uint32_t actual_size) {
1351 		T_LOG_VERBOSE("    got compressed %d: %u(%x) bytes actual=%d", slot_idx, compressed_size, compressed_size, actual_size);
1352 		// first verify that it decompresses to the correct size
1353 		uint8_t decompressed[C_MTE_SIZE] = {};
1354 		bool ret = vm_mte_rle_decompress_tags(compressed_buf, compressed_size, (uint8_t*)decompressed, C_MTE_SIZE);
1355 		T_QUIET; T_ASSERT_TRUE(ret, "decompress failed");
1356 
1357 		ret = vm_mte_rle_comp_histogram(compressed_buf, compressed_size, comp_hist_ptr);
1358 		T_QUIET; T_ASSERT_TRUE(ret, "vm_mte_rle_cmd_histogram");
1359 		vm_mte_rle_runs_histogram(decompressed, C_MTE_SIZE, run_hist_ptr);
1360 
1361 		uint8_t re_compressed[C_MTE_SIZE] = {};
1362 		uint32_t re_compress_sz = vm_mte_rle_compress_tags(decompressed, C_MTE_SIZE, re_compressed, C_MTE_SIZE);
1363 		ret = vm_mte_rle_comp_histogram(re_compressed, re_compress_sz, re_comp_hist_ptr);
1364 		T_QUIET; T_ASSERT_TRUE(ret, "re-vm_mte_rle_cmd_histogram");
1365 	}, load_from_file, dump_to_file);
1366 
1367 	print_comp_hist(&comp_hist);
1368 
1369 	if (show_lens) {
1370 		T_LOG("RLE run lengths histogram:");
1371 		for (int i = 0; i < countof(run_hist.rh_bins); ++i) {
1372 			T_LOG("|  %d,  %llu", i, run_hist.rh_bins[i]);
1373 		}
1374 	}
1375 	if (show_recompress) {
1376 		T_LOG("*** recompressed ***");
1377 		print_comp_hist(&re_comp_hist);
1378 	}
1379 	if (show_cseg_state) {
1380 		T_LOG("cseg-state histogram:");
1381 		for (int i = 0; i < C_STATE_COUNT + 1; ++i) {
1382 			T_LOG("|  %d,  %llu", i, cseg_hist.csegs_per_state[i]);
1383 		}
1384 	}
1385 }
1386 
1387 T_DECL(mte_compressor_analyze_rle,
1388     "Exercise the MTE tags compress, then read, verify and print the RLE commands stats and the runs stats",
1389     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1390     XNU_T_META_SOC_SPECIFIC)
1391 {
1392 	const char *dump_to_file = NULL, *load_from_file = NULL;
1393 	bool show_lens = false, show_recompress = false, show_state = false;
1394 	for (int i = 0; i < argc; ++i) {
1395 		if (strcmp(argv[i], "--in") == 0) {
1396 			load_from_file = argv[++i];
1397 			T_LOG("Loading data from `%s`", load_from_file);
1398 		} else if (strcmp(argv[i], "--out") == 0) {
1399 			dump_to_file = argv[++i];
1400 			T_LOG("Dumping data to `%s`", dump_to_file);
1401 		} else if (strcmp(argv[i], "--show-lens") == 0) {
1402 			show_lens = true;
1403 		} else if (strcmp(argv[i], "--recompress") == 0) {
1404 			// this option allows testing new changes in the compression algorithm compared to
1405 			// what's loaded from the input file/sysctl
1406 			show_recompress = true;
1407 		} else if (strcmp(argv[i], "--state") == 0) {
1408 			// useful for stats on how many segments are in the swap
1409 			show_state = true;
1410 		}
1411 	}
1412 	analyse_rle_runs(load_from_file, dump_to_file, show_lens, show_recompress, show_state);
1413 	if (load_from_file) {
1414 		return; // don't want to print irrelevant stats when processing data from file
1415 	}
1416 	print_stats();
1417 }
1418 
1419 T_DECL(mte_compressor_exercise_analyze_rle,
1420     "Exercise the MTE tags compress, then read, verify and print the RLE commands stats and the runs stats",
1421     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_MTE2", 1),
1422     XNU_T_META_SOC_SPECIFIC)
1423 {
1424 	size_t bufsize = 100 * PAGE_SIZE;
1425 	vm_address_t address = make_rand_tagged_buf(bufsize);
1426 	page_out(address, bufsize);
1427 	usleep(20000); // wait for the compressor to finish
1428 	analyse_rle_runs(NULL, NULL, false, false, false);
1429 	print_stats();
1430 	dealloc(address, bufsize);
1431 }
1432