1 /*
2 * Copyright (c) 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _ARM64_MTE_H_
30 #define _ARM64_MTE_H_
31
32 #include <sys/types.h>
33
34 #if XNU_KERNEL_PRIVATE
35 #include <vm/vm_memtag.h>
36 #if DEVELOPMENT || DEBUG
37 extern void mte_validate_tco_state(void);
38 #endif /* DEVELOPMENT || DEBUG */
39 #else /* XNU_KERNEL_PRIVATE */
40 #include <assert.h>
41 #include <strings.h>
42 #define mte_validate_tco_state() do { } while(0)
43 #endif /* XNU_KERNEL_PRIVATE */
44
45 #include <arm_acle.h>
46
47 __BEGIN_DECLS
48
49 /**
50 * The interfaces provided here rely on the MTE ISA being available at compile
51 * time, and on MTE being enabled in the process being executed at runtime.
52 * It is a responsibility of clients of this API to check that to ensure the
53 * correct behaviour.
54 */
55
56 /**
57 * @typedef mte_exclude_mask_t
58 *
59 * @abstract Represent an MTE tag exclusion mask, used in tag generation.
60 */
61 typedef uint64_t mte_exclude_mask_t;
62
63 #define MTE_TAG_SPAN_SIZE (16)
64 #define MTE_TAGS_PER_SIZE(s) (roundup(s, 16) / MTE_TAG_SPAN_SIZE)
65 #define MTE_SIZE_TO_ATAG_STORAGE(s) (MTE_TAGS_PER_SIZE(s) / 2)
66
67 /*
68 * Helpers for MTE intrinsics.
69 * Clang provides a basic set of MTE intrinsics which doesn't cover the
70 * whole spectrum of ISA instructions. We provide here the complete set,
71 * prefixed under mte_
72 */
73
74 #pragma mark Tag store operations
75
76 /*!
77 * @function mte_store_tag_16()
78 *
79 * @brief
80 * Sets the tag for the 16-byte memory span starting at the given address.
81 *
82 * @discussion
83 * This function wraps the ARM __arm_mte_set_tag intrinsic.
84 *
85 * @param addr The starting address of the 16-byte span to tag.
86 */
87 static inline void
mte_store_tag_16(void * addr)88 mte_store_tag_16(void *addr)
89 {
90 __arm_mte_set_tag(addr);
91 }
92
93 /*!
94 * @function mte_store_tag_32()
95 *
96 * @brief
97 * Sets the tag for the 32-byte memory span starting at the given address.
98 *
99 * @discussion
100 * This function wraps the ARM ST2G instruction.
101 *
102 * @param addr The starting address of the 32-byte span to tag.
103 */
104 static inline void
mte_store_tag_32(void * addr)105 mte_store_tag_32(void *addr)
106 {
107 __asm__ __volatile__ ("st2g %0, [%0]" : : "r" (addr) : "memory");
108 }
109
110 /*!
111 * @function mte_store_tag_64()
112 *
113 * @brief
114 * Sets the tag for the 64-byte memory span starting at the given address.
115 *
116 * @discussion
117 * This function wraps the ARM DC GVA instruction.
118 *
119 * @param addr The starting address of the 64-byte span to tag.
120 */
121 static inline void
mte_store_tag_64(void * addr)122 mte_store_tag_64(void *addr)
123 {
124 __asm__ __volatile__ ("dc gva, %0" : : "r" (addr) : "memory");
125 }
126
127 /*!
128 * @function mte_store_tag_small()
129 *
130 * @brief
131 * Sets the tag across a contiguous memory buffer of size smaller than 64 bytes.
132 *
133 * @warning
134 * This function is used by mte_store_tag(), and clients should refrain from
135 * calling it directly. The function explicitly asserts on the preconditions
136 * of the size of the buffer.
137 *
138 * @param start The starting address of the buffer.
139 * @param end The end of the buffer.
140 */
141 static inline void
mte_store_tag_small(uintptr_t start,uintptr_t end)142 mte_store_tag_small(uintptr_t start, uintptr_t end)
143 {
144 /* Optimize STG/ST2G for sub-64 byte sizes. start and end must be 16-byte aligned. */
145 size_t size = end - start;
146 assert(size < 64 && size % 16 == 0);
147
148 if (size <= 16) {
149 __asm__ __volatile__ ("stg %0, [%0], #16" : "+r" (start) : : "memory");
150 return;
151 }
152
153 /* At least 32 bytes need to be written */
154 __asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (start) : : "memory");
155
156 // Tag the last 16 bytes
157 end -= 16;
158 __asm__ __volatile__ ("stg %0, [%0], #16" : "+r" (end) : : "memory");
159 }
160
161 /*
162 * When setting tags, the common desire for consumers is to pass an address and
163 * an arbitrary size and have the whole buffer tagged with the desired value.
164 * We provide an optimized generic tag setting function here.
165 */
166
167 /*!
168 * @function mte_store_tag()
169 *
170 * @brief
171 * Sets the tag across a contiguous memory buffer of arbitrary size.
172 *
173 * @discussion
174 * The buffer is tagged with the logical tag embedded in the pointer
175 * @c addr. This function handles alignment and size efficiently using a
176 * combination of ST2G and DC GVA instructions. It rounds the effective
177 * start and end addresses down/up to 16-byte boundaries respectively.
178 *
179 * @param addr A pointer containing the desired logical tag.
180 * The address part is used as the starting point for tagging,
181 * rounded down to 16 bytes.
182 * @param size The size of the buffer to tag, in bytes.
183 */
184 static inline void
mte_store_tag(void * __unsafe_indexable addr,size_t size)185 mte_store_tag(void *__unsafe_indexable addr, size_t size)
186 {
187 uintptr_t end = (uintptr_t)addr + size;
188
189 uintptr_t ptr = ((uintptr_t)addr & -16); // round down to 16 bytes alignment
190 end = (((uintptr_t)end + 15) & -16); // round up to 16 bytes alignment
191
192 /* "Fast path" for small allocations */
193 if (end - ptr < 64) {
194 mte_store_tag_small(ptr, end);
195 return;
196 }
197
198 #if XNU_KERNEL_PRIVATE
199 /*
200 * STGM is a privileged instruction that allows to tag 256 bytes at the time.
201 * We can take advantage of it for large buffers in kernel space. For simplicity
202 * we capture here only the case where the alignment is on at least 256 bytes,
203 * so that all page aligned operations get covered by this function. Performance
204 * will tell us if we need to further expand this to potentially misaligned
205 * buffers.
206 */
207 if ((vm_map_address_t)ptr % 256 == 0 && ptr + 256 <= end) {
208 /*
209 * STGM is special and gets a tag list as a separate parameter. Forge a
210 * tag list out of the pointer LTag.
211 */
212 uint64_t tag_list = vm_memtag_extract_tag((vm_address_t)ptr) * 0x1111111111111111ul;
213
214 while (ptr + 256 <= end) {
215 __asm__ __volatile__ ("stgm %0, [%1]" : "+r" (tag_list) : "r" (ptr) : "memory");
216 ptr += 256;
217 }
218
219 /* If we were aligned and a multiple of (common case), we are done. */
220 if (ptr == end) {
221 return;
222 }
223 /* For small remainder */
224 if (end - ptr < 64) {
225 mte_store_tag_small(ptr, end);
226 return;
227 }
228 }
229 #endif /* XNU_KERNEL_PRIVATE */
230
231 // At least 64 bytes need to be tagged
232 // Tag 64 bytes to make sure addr can be aligned to 64 bytes
233 __asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (ptr) : : "memory");
234 __asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (ptr) : : "memory");
235 if (ptr == end) {
236 return;
237 }
238
239 /* Optimize for DC GVA usage */
240 ptr = (ptr & -64); // round down to 64 bytes alignment
241 while (ptr + 64 < end) {
242 __asm__ __volatile__ ("dc gva, %0" : : "r" (ptr) : "memory");
243 ptr += 64;
244 }
245
246 // tag the last 64 bytes
247 end -= 64;
248 __asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (end) : : "memory");
249 __asm__ __volatile__ ("st2g %0, [%0], #32" : "+r" (end) : : "memory");
250 }
251
252 #pragma mark Tag load operations
253
254 /*!
255 * @function mte_load_tag()
256 *
257 * @brief
258 * Loads the tag associated to the memory address @c addr.
259 *
260 * @discussion
261 * This function wraps the ARM __arm_mte_get_tag intrinsic.
262 * The returned pointer has the physical tag associated to @c addr
263 * applied to the logical address bits of the pointer itself.
264 *
265 * @param addr The address from which to load the tag.
266 * @returns A pointer with the tag from the memory location applied.
267 */
268 static inline void *
mte_load_tag(void * addr)269 mte_load_tag(void *addr)
270 {
271 addr = __arm_mte_get_tag(addr);
272 return addr;
273 }
274
275 #pragma mark Tag Check Override operations
276
277 /*!
278 * @function mte_disable_tag_checking()
279 *
280 * @brief
281 * Disable hardware tag checking for the current thread by setting the
282 * PSTATE.TCO bit.
283 *
284 * @discussion
285 * Memory accesses performed while tag checking is disabled will not cause
286 * tag check faults. This should be used sparingly and only around specific,
287 * validated code paths where tag checking is known to be unnecessary and/or
288 * performance-prohibitive. Tag checking should be re-enabled as soon as
289 * possible.
290 */
291 static inline void
mte_disable_tag_checking()292 mte_disable_tag_checking()
293 {
294 #if DEVELOPMENT || DEBUG
295 mte_validate_tco_state();
296 #endif /* DEVELOPMENT || DEBUG */
297 __asm__ __volatile__ ("msr TCO, #1");
298 }
299
300 /*!
301 * @function mte_enable_tag_checking()
302 *
303 * @brief
304 * Re-enables hardware tag checking by clearing the PSTATE.TCO bit.
305 *
306 * @discussion
307 * This should be called after a corresponding call to
308 * mte_disable_tag_checking().
309 */
310 static inline void
mte_enable_tag_checking()311 mte_enable_tag_checking()
312 {
313 __asm__ __volatile__ ("msr TCO, #0");
314 }
315
316 #pragma mark Random Tag Generation helpers
317
318 /*!
319 * @function mte_update_exclude_mask()
320 *
321 * @brief
322 * Updates an exclusion mask based on the tag of the pointer @c src.
323 *
324 * @discussion
325 * This is typically used before generating a random tag to ensure
326 * the newly generated tag is different from an existing tag.
327 * This function wraps the ARM __arm_mte_exclude_tag intrinsic.
328 *
329 * @param src The pointer whose tag should be added to the
330 * exclusion mask.
331 * @param exclude_mask The current exclusion mask.
332 * @returns The updated exclusion mask including the tag from
333 * @c src.
334 */
335 static inline mte_exclude_mask_t
mte_update_exclude_mask(void * src,mte_exclude_mask_t exclude_mask)336 mte_update_exclude_mask(void *src, mte_exclude_mask_t exclude_mask)
337 {
338 return __arm_mte_exclude_tag(src, exclude_mask);
339 }
340
341 /*!
342 * @function mte_generate_random_tag()
343 *
344 * @brief
345 * Generates a new random tag for @c target_address, excluding tags
346 * specified in the @c exclude_mask.
347 *
348 * @discussion
349 * This function wraps the ARM __arm_mte_create_random_tag intrinsic.
350 * The returned pointer has the newly generated random tag applied to
351 * the logical tag bits of @c target_address.
352 *
353 * @param target_address The base address for which to generate a tag.
354 * @param exclude_mask A mask of tags to exclude from the random
355 * generation.
356 * @returns A pointer with the newly generated random tag
357 * applied.
358 */
359 static inline void *
mte_generate_random_tag(void * target_address,mte_exclude_mask_t exclude_mask)360 mte_generate_random_tag(void *target_address, mte_exclude_mask_t exclude_mask)
361 {
362 return __arm_mte_create_random_tag(target_address, exclude_mask);
363 }
364
365 # pragma mark Memory Zeroing helpers
366
367 /*!
368 * @function mte_bzero_unchecked()
369 *
370 * @brief
371 * Performs a bzero operation on the buffer with hardware tag checking
372 * temporarily disabled (PSTATE.TCO=1).
373 *
374 * @discussion
375 * This variant does *not* perform any tag checks on the buffer boundaries.
376 *
377 * @param buf The buffer to zero.
378 * @param n The number of bytes to zero.
379 */
380 static inline void
mte_bzero_unchecked(void * __unsafe_indexable buf,size_t n)381 mte_bzero_unchecked(void *__unsafe_indexable buf, size_t n)
382 {
383 mte_disable_tag_checking();
384 bzero(__unsafe_forge_bidi_indexable(void *, buf, n), n);
385 mte_enable_tag_checking();
386 }
387
388 /*!
389 * @function mte_bzero_fast_checked()
390 *
391 * @brief
392 * Performs a bzero operation on the buffer with hardware tag checking
393 * temporarily disabled (PSTATE.TCO=1).
394 *
395 * @discussion
396 * Before disabling checking, it performs a checked access to the first
397 * and last byte of the buffer to ensure those boundaries are valid
398 * according to their current tags. This provides a minimal boundary check
399 * while still allowing the core bzero operation to run unchecked for
400 * performance.
401 *
402 * @param buf The buffer to zero.
403 * @param n The number of bytes to zero.
404 */
405 static inline void
mte_bzero_fast_checked(void * __unsafe_indexable buf,size_t n)406 mte_bzero_fast_checked(void *__unsafe_indexable buf, size_t n)
407 {
408 /*
409 * Run zeroing operations with tag checking disabled (PSTATE.TCO=1) to not
410 * trash the G$ and to maximize the pipeline usage. This implies that no checks
411 * are performed on the boundary of the bzero() operation. This is generally
412 * fine because such boundaries are static and derived from the type/entity
413 * that is calling bzero, but notwithstanding this, we touch the first and last
414 * line of the buffer, to ensure that the tagged access succeeds. This has
415 * also the effect of prefetching the associated G$ line(s), which is/are going
416 * to be used shortly after to tag set. If the line is in DRAM, the cost of
417 * prefetching will be partially absorbed while the stream of DC ZVAs is
418 * performed.
419 */
420 asm volatile ("ldrb wzr, [%0]" : : "r"(buf) : "memory");
421 mte_bzero_unchecked(buf, n);
422 asm volatile ("ldrb wzr, [%0]" : : "r"((uintptr_t)buf + n - 1) : "memory");
423 }
424
425 __END_DECLS
426
427 #endif /* _ARM64_MTE_H_ */
428