xref: /xnu-12377.81.4/osfmk/arm64/mte.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _ARM64_MTE_H_
30 #define _ARM64_MTE_H_
31 
32 #include <sys/types.h>
33 
34 #if XNU_KERNEL_PRIVATE
35 #include <vm/vm_memtag.h>
36 #if DEVELOPMENT || DEBUG
37 extern void mte_validate_tco_state(void);
38 #endif /* DEVELOPMENT || DEBUG */
39 #else /* XNU_KERNEL_PRIVATE */
40 #include <assert.h>
41 #include <strings.h>
42 #define mte_validate_tco_state()        do { } while(0)
43 #endif /* XNU_KERNEL_PRIVATE */
44 
45 #include <arm_acle.h>
46 
47 __BEGIN_DECLS
48 
49 /**
50  * The interfaces provided here rely on the MTE ISA being available at compile
51  * time, and on MTE being enabled in the process being executed at runtime.
52  * It is a responsibility of clients of this API to check that to ensure the
53  * correct behaviour.
54  */
55 
56 /**
57  * @typedef mte_exclude_mask_t
58  *
59  * @abstract Represent an MTE tag exclusion mask, used in tag generation.
60  */
61 typedef uint64_t mte_exclude_mask_t;
62 
63 #define MTE_TAG_SPAN_SIZE              (16)
64 #define MTE_TAGS_PER_SIZE(s)           (roundup(s, 16) / MTE_TAG_SPAN_SIZE)
65 #define MTE_SIZE_TO_ATAG_STORAGE(s)    (MTE_TAGS_PER_SIZE(s) / 2)
66 
67 /*
68  * Helpers for MTE intrinsics.
69  * Clang provides a basic set of MTE intrinsics which doesn't cover the
70  * whole spectrum of ISA instructions. We provide here the complete set,
71  * prefixed under mte_
72  */
73 
74 #pragma mark Tag store operations
75 
76 /*!
77  * @function mte_store_tag_16()
78  *
79  * @brief
80  * Sets the tag for the 16-byte memory span starting at the given address.
81  *
82  * @discussion
83  * This function wraps the ARM __arm_mte_set_tag intrinsic.
84  *
85  * @param addr  The starting address of the 16-byte span to tag.
86  */
87 static inline void
mte_store_tag_16(void * addr)88 mte_store_tag_16(void *addr)
89 {
90 	__arm_mte_set_tag(addr);
91 }
92 
93 /*!
94  * @function mte_store_tag_32()
95  *
96  * @brief
97  * Sets the tag for the 32-byte memory span starting at the given address.
98  *
99  * @discussion
100  * This function wraps the ARM ST2G instruction.
101  *
102  * @param addr  The starting address of the 32-byte span to tag.
103  */
104 static inline void
mte_store_tag_32(void * addr)105 mte_store_tag_32(void *addr)
106 {
107 	__asm__ __volatile__ ("st2g %0, [%0]" : : "r" (addr) : "memory");
108 }
109 
110 /*!
111  * @function mte_store_tag_64()
112  *
113  * @brief
114  * Sets the tag for the 64-byte memory span starting at the given address.
115  *
116  * @discussion
117  * This function wraps the ARM DC GVA instruction.
118  *
119  * @param addr  The starting address of the 64-byte span to tag.
120  */
121 static inline void
mte_store_tag_64(void * addr)122 mte_store_tag_64(void *addr)
123 {
124 	__asm__ __volatile__ ("dc gva, %0" : : "r" (addr) : "memory");
125 }
126 
127 /*!
128  * @function mte_store_tag_small()
129  *
130  * @brief
131  * Sets the tag across a contiguous memory buffer of size smaller than 64 bytes.
132  *
133  * @warning
134  * This function is used by mte_store_tag(), and clients should refrain from
135  * calling it directly. The function explicitly asserts on the preconditions
136  * of the size of the buffer.
137  *
138  * @param start The starting address of the buffer.
139  * @param end   The end of the buffer.
140  */
141 static inline void
mte_store_tag_small(uintptr_t start,uintptr_t end)142 mte_store_tag_small(uintptr_t start, uintptr_t end)
143 {
144 	/* Optimize STG/ST2G for sub-64 byte sizes. start and end must be 16-byte aligned. */
145 	size_t size = end - start;
146 	assert(size < 64 && size % 16 == 0);
147 
148 	if (size <= 16) {
149 		__asm__ __volatile__ ("stg %0, [%0], #16" : "+r" (start) : : "memory");
150 		return;
151 	}
152 
153 	/* At least 32 bytes need to be written */
154 	__asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (start) : : "memory");
155 
156 	// Tag the last 16 bytes
157 	end -= 16;
158 	__asm__ __volatile__ ("stg %0, [%0], #16" : "+r" (end) : : "memory");
159 }
160 
161 /*
162  * When setting tags, the common desire for consumers is to pass an address and
163  * an arbitrary size and have the whole buffer tagged with the desired value.
164  * We provide an optimized generic tag setting function here.
165  */
166 
167 /*!
168  * @function mte_store_tag()
169  *
170  * @brief
171  * Sets the tag across a contiguous memory buffer of arbitrary size.
172  *
173  * @discussion
174  * The buffer is tagged with the logical tag embedded in the pointer
175  * @c addr. This function handles alignment and size efficiently using a
176  * combination of ST2G and DC GVA instructions. It rounds the effective
177  * start and end addresses down/up to 16-byte boundaries respectively.
178  *
179  * @param addr  A pointer containing the desired logical tag.
180  *              The address part is used as the starting point for tagging,
181  *              rounded down to 16 bytes.
182  * @param size  The size of the buffer to tag, in bytes.
183  */
184 static inline void
mte_store_tag(void * __unsafe_indexable addr,size_t size)185 mte_store_tag(void *__unsafe_indexable addr, size_t size)
186 {
187 	uintptr_t end = (uintptr_t)addr + size;
188 
189 	uintptr_t ptr = ((uintptr_t)addr & -16);  // round down to 16 bytes alignment
190 	end = (((uintptr_t)end + 15) & -16);  // round up to 16 bytes alignment
191 
192 	/* "Fast path" for small allocations */
193 	if (end - ptr < 64) {
194 		mte_store_tag_small(ptr, end);
195 		return;
196 	}
197 
198 #if XNU_KERNEL_PRIVATE
199 	/*
200 	 * STGM is a privileged instruction that allows to tag 256 bytes at the time.
201 	 * We can take advantage of it for large buffers in kernel space. For simplicity
202 	 * we capture here only the case where the alignment is on at least 256 bytes,
203 	 * so that all page aligned operations get covered by this function. Performance
204 	 * will tell us if we need to further expand this to potentially misaligned
205 	 * buffers.
206 	 */
207 	if ((vm_map_address_t)ptr % 256 == 0 && ptr + 256 <= end) {
208 		/*
209 		 * STGM is special and gets a tag list as a separate parameter. Forge a
210 		 * tag list out of the pointer LTag.
211 		 */
212 		uint64_t tag_list = vm_memtag_extract_tag((vm_address_t)ptr) * 0x1111111111111111ul;
213 
214 		while (ptr + 256 <= end) {
215 			__asm__ __volatile__ ("stgm %0, [%1]" : "+r" (tag_list) : "r" (ptr) : "memory");
216 			ptr += 256;
217 		}
218 
219 		/* If we were aligned and a multiple of (common case), we are done. */
220 		if (ptr == end) {
221 			return;
222 		}
223 		/* For small remainder */
224 		if (end - ptr < 64) {
225 			mte_store_tag_small(ptr, end);
226 			return;
227 		}
228 	}
229 #endif /* XNU_KERNEL_PRIVATE */
230 
231 	// At least 64 bytes need to be tagged
232 	// Tag 64 bytes to make sure addr can be aligned to 64 bytes
233 	__asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (ptr) : : "memory");
234 	__asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (ptr) : : "memory");
235 	if (ptr == end) {
236 		return;
237 	}
238 
239 	/* Optimize for DC GVA usage */
240 	ptr = (ptr & -64);  // round down to 64 bytes alignment
241 	while (ptr + 64 < end) {
242 		__asm__ __volatile__ ("dc gva, %0" : : "r" (ptr) : "memory");
243 		ptr += 64;
244 	}
245 
246 	// tag the last 64 bytes
247 	end -= 64;
248 	__asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (end) : : "memory");
249 	__asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (end) : : "memory");
250 }
251 
252 #pragma mark Tag load operations
253 
254 /*!
255  * @function mte_load_tag()
256  *
257  * @brief
258  * Loads the tag associated to the memory address @c addr.
259  *
260  * @discussion
261  * This function wraps the ARM __arm_mte_get_tag intrinsic.
262  * The returned pointer has the physical tag associated to @c addr
263  * applied to the logical address bits of the pointer itself.
264  *
265  * @param addr  The address from which to load the tag.
266  * @returns     A pointer with the tag from the memory location applied.
267  */
268 static inline void *
mte_load_tag(void * addr)269 mte_load_tag(void *addr)
270 {
271 	addr = __arm_mte_get_tag(addr);
272 	return addr;
273 }
274 
275 #pragma mark Tag Check Override operations
276 
277 /*!
278  * @function mte_disable_tag_checking()
279  *
280  * @brief
281  * Disable hardware tag checking for the current thread by setting the
282  * PSTATE.TCO bit.
283  *
284  * @discussion
285  * Memory accesses performed while tag checking is disabled will not cause
286  * tag check faults. This should be used sparingly and only around specific,
287  * validated code paths where tag checking is known to be unnecessary and/or
288  * performance-prohibitive. Tag checking should be re-enabled as soon as
289  * possible.
290  */
291 static inline void
mte_disable_tag_checking()292 mte_disable_tag_checking()
293 {
294 #if DEVELOPMENT || DEBUG
295 	mte_validate_tco_state();
296 #endif /* DEVELOPMENT || DEBUG */
297 	__asm__ __volatile__ ("msr TCO, #1");
298 }
299 
300 /*!
301  * @function mte_enable_tag_checking()
302  *
303  * @brief
304  * Re-enables hardware tag checking by clearing the PSTATE.TCO bit.
305  *
306  * @discussion
307  * This should be called after a corresponding call to
308  * mte_disable_tag_checking().
309  */
310 static inline void
mte_enable_tag_checking()311 mte_enable_tag_checking()
312 {
313 	__asm__ __volatile__ ("msr TCO, #0");
314 }
315 
316 #pragma mark Random Tag Generation helpers
317 
318 /*!
319  * @function mte_update_exclude_mask()
320  *
321  * @brief
322  * Updates an exclusion mask based on the tag of the pointer @c src.
323  *
324  * @discussion
325  * This is typically used before generating a random tag to ensure
326  * the newly generated tag is different from an existing tag.
327  * This function wraps the ARM __arm_mte_exclude_tag intrinsic.
328  *
329  * @param src           The pointer whose tag should be added to the
330  *                      exclusion mask.
331  * @param exclude_mask  The current exclusion mask.
332  * @returns             The updated exclusion mask including the tag from
333  *                      @c src.
334  */
335 static inline mte_exclude_mask_t
mte_update_exclude_mask(void * src,mte_exclude_mask_t exclude_mask)336 mte_update_exclude_mask(void *src, mte_exclude_mask_t exclude_mask)
337 {
338 	return __arm_mte_exclude_tag(src, exclude_mask);
339 }
340 
341 /*!
342  * @function mte_generate_random_tag()
343  *
344  * @brief
345  * Generates a new random tag for @c target_address, excluding tags
346  * specified in the @c exclude_mask.
347  *
348  * @discussion
349  * This function wraps the ARM __arm_mte_create_random_tag intrinsic.
350  * The returned pointer has the newly generated random tag applied to
351  * the logical tag bits of @c target_address.
352  *
353  * @param target_address    The base address for which to generate a tag.
354  * @param exclude_mask      A mask of tags to exclude from the random
355  *                          generation.
356  * @returns                 A pointer with the newly generated random tag
357  *                          applied.
358  */
359 static inline void *
mte_generate_random_tag(void * target_address,mte_exclude_mask_t exclude_mask)360 mte_generate_random_tag(void *target_address, mte_exclude_mask_t exclude_mask)
361 {
362 	return __arm_mte_create_random_tag(target_address, exclude_mask);
363 }
364 
365 # pragma mark Memory Zeroing helpers
366 
367 /*!
368  * @function mte_bzero_unchecked()
369  *
370  * @brief
371  * Performs a bzero operation on the buffer with hardware tag checking
372  * temporarily disabled (PSTATE.TCO=1).
373  *
374  * @discussion
375  * This variant does *not* perform any tag checks on the buffer boundaries.
376  *
377  * @param buf   The buffer to zero.
378  * @param n     The number of bytes to zero.
379  */
380 static inline void
mte_bzero_unchecked(void * __unsafe_indexable buf,size_t n)381 mte_bzero_unchecked(void *__unsafe_indexable buf, size_t n)
382 {
383 	mte_disable_tag_checking();
384 	bzero(__unsafe_forge_bidi_indexable(void *, buf, n), n);
385 	mte_enable_tag_checking();
386 }
387 
388 /*!
389  * @function mte_bzero_fast_checked()
390  *
391  * @brief
392  * Performs a bzero operation on the buffer with hardware tag checking
393  * temporarily disabled (PSTATE.TCO=1).
394  *
395  * @discussion
396  * Before disabling checking, it performs a checked access to the first
397  * and last byte of the buffer to ensure those boundaries are valid
398  * according to their current tags. This provides a minimal boundary check
399  * while still allowing the core bzero operation to run unchecked for
400  * performance.
401  *
402  * @param buf   The buffer to zero.
403  * @param n     The number of bytes to zero.
404  */
405 static inline void
mte_bzero_fast_checked(void * __unsafe_indexable buf,size_t n)406 mte_bzero_fast_checked(void *__unsafe_indexable buf, size_t n)
407 {
408 	/*
409 	 * Run zeroing operations with tag checking disabled (PSTATE.TCO=1) to not
410 	 * trash the G$ and to maximize the pipeline usage. This implies that no checks
411 	 * are performed on the boundary of the bzero() operation. This is generally
412 	 * fine because such boundaries are static and derived from the type/entity
413 	 * that is calling bzero, but notwithstanding this, we touch the first and last
414 	 * line of the buffer, to ensure that the tagged access succeeds. This has
415 	 * also the effect of prefetching the associated G$ line(s), which is/are going
416 	 * to be used shortly after to tag set. If the line is in DRAM, the cost of
417 	 * prefetching will be partially absorbed while the stream of DC ZVAs is
418 	 * performed.
419 	 */
420 	asm volatile ("ldrb wzr, [%0]" : : "r"(buf) : "memory");
421 	mte_bzero_unchecked(buf, n);
422 	asm volatile ("ldrb wzr, [%0]" : : "r"((uintptr_t)buf + n - 1) : "memory");
423 }
424 
425 __END_DECLS
426 
427 #endif /* _ARM64_MTE_H_ */
428