xref: /xnu-11215.81.4/osfmk/arm64/tlb.h (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2019-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #pragma once
30 
31 #include <arm64/proc_reg.h>
32 #include <machine/atomic.h>
33 
34 #define tlbi_addr(x) ((((x) >> 12) & TLBI_ADDR_MASK) << TLBI_ADDR_SHIFT)
35 #define tlbi_asid(x) (((uintptr_t)(x) & TLBI_ASID_MASK) << TLBI_ASID_SHIFT)
36 
37 #if __ARM_KERNEL_PROTECT__
38 /*
39  * __ARM_KERNEL_PROTECT__ adds two complications to TLB management:
40  *
41  * 1. As each pmap has two ASIDs, every TLB operation that targets an ASID must
42  *   target both ASIDs for the pmap that owns the target ASID.
43  *
44  * 2. Any TLB operation targeting the kernel_pmap ASID (ASID 0) must target all
45  *   ASIDs (as kernel_pmap mappings may be referenced while using an ASID that
46  *   belongs to another pmap).  We expect these routines to be called with the
47  *   EL0 ASID for the target; not the EL1 ASID.
48  */
49 #endif /* __ARM_KERNEL_PROTECT__ */
50 
51 static inline void
sync_tlb_flush(void)52 sync_tlb_flush(void)
53 {
54 #if HAS_FEAT_XS
55 	asm volatile ("dsb ishnxs":::"memory");
56 #else
57 	__builtin_arm_dsb(DSB_ISH);
58 #endif /* HAS_FEAT_XS */
59 	__builtin_arm_isb(ISB_SY);
60 }
61 
62 static inline void
sync_tlb_flush_local(void)63 sync_tlb_flush_local(void)
64 {
65 #if HAS_FEAT_XS
66 	asm volatile ("dsb nshnxs":::"memory");
67 #else
68 	__builtin_arm_dsb(DSB_NSH);
69 #endif /* HAS_FEAT_XS */
70 	__builtin_arm_isb(ISB_SY);
71 }
72 
73 #if   HAS_FEAT_XS
74 
75 static inline void
sync_tlb_flush_strong(void)76 sync_tlb_flush_strong(void)
77 {
78 	__builtin_arm_dsb(DSB_ISH);
79 	__builtin_arm_isb(ISB_SY);
80 }
81 
82 #endif //
83 
84 
85 static inline void
arm64_sync_tlb(bool strong __unused)86 arm64_sync_tlb(bool strong __unused)
87 {
88 	sync_tlb_flush();
89 }
90 
91 // flush_mmu_tlb: full TLB flush on all cores
92 static inline void
flush_mmu_tlb_async(void)93 flush_mmu_tlb_async(void)
94 {
95 	asm volatile ("tlbi vmalle1is");
96 }
97 
98 static inline void
flush_mmu_tlb(void)99 flush_mmu_tlb(void)
100 {
101 	flush_mmu_tlb_async();
102 #if HAS_FEAT_XS
103 	/* Full flush is always treated as "strong" when there is a HW-level distinction. */
104 	sync_tlb_flush_strong();
105 #else
106 	sync_tlb_flush();
107 #endif /* HAS_FEAT_XS */
108 }
109 
110 // flush_core_tlb: full TLB flush on local core only
111 static inline void
flush_core_tlb_async(void)112 flush_core_tlb_async(void)
113 {
114 #if HAS_FEAT_XS
115 	asm volatile ("tlbi vmalle1nxs");
116 #else
117 	asm volatile ("tlbi vmalle1");
118 #endif /* HAS_FEAT_XS */
119 }
120 
121 static inline void
flush_core_tlb(void)122 flush_core_tlb(void)
123 {
124 	flush_core_tlb_async();
125 	sync_tlb_flush_local();
126 }
127 
128 // flush_mmu_tlb_allentries_async: flush entries that map VA range, all ASIDS, all cores
129 // start and end are in units of 4K pages.
130 static inline void
flush_mmu_tlb_allentries_async(uint64_t start,uint64_t end,uint64_t pmap_page_size,bool last_level_only,bool strong __unused)131 flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size,
132     bool last_level_only, bool strong __unused)
133 {
134 #if __ARM_16K_PG__
135 	if (pmap_page_size == 16384) {
136 		start = start & ~0x3ULL;
137 
138 		/*
139 		 * The code below is not necessarily correct.  From an overview of
140 		 * the client code, the expected contract for TLB flushes is that
141 		 * we will expand from an "address, length" pair to "start address,
142 		 * end address" in the course of a TLB flush.  This suggests that
143 		 * a flush for "X, X+4" is actually only asking for a flush of a
144 		 * single 16KB page.  At the same time, we'd like to be prepared
145 		 * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page
146 		 * number to a 16KB page boundary.  This should deal correctly with
147 		 * unaligned inputs.
148 		 *
149 		 * If our expecations about client behavior are wrong however, this
150 		 * will lead to occasional TLB corruption on platforms with 16KB
151 		 * pages.
152 		 */
153 		end = (end + 0x3ULL) & ~0x3ULL;
154 	}
155 #endif // __ARM_16K_PG__
156 	if (last_level_only) {
157 		for (; start < end; start += (pmap_page_size / 4096)) {
158 #if HAS_FEAT_XS
159 			if (__probable(!strong)) {
160 				asm volatile ("tlbi vaale1isnxs, %0" : : "r"(start));
161 			} else
162 #endif /* HAS_FEAT_XS */
163 			{
164 				asm volatile ("tlbi vaale1is, %0" : : "r"(start));
165 			}
166 		}
167 	} else {
168 		for (; start < end; start += (pmap_page_size / 4096)) {
169 #if HAS_FEAT_XS
170 			if (__probable(!strong)) {
171 				asm volatile ("tlbi vaae1isnxs, %0" : : "r"(start));
172 			} else
173 #endif /* HAS_FEAT_XS */
174 			{
175 				asm volatile ("tlbi vaae1is, %0" : : "r"(start));
176 			}
177 		}
178 	}
179 }
180 
181 static inline void
flush_mmu_tlb_allentries(uint64_t start,uint64_t end,uint64_t pmap_page_size,bool last_level_only,bool strong)182 flush_mmu_tlb_allentries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong)
183 {
184 	flush_mmu_tlb_allentries_async(start, end, pmap_page_size, last_level_only, strong);
185 	arm64_sync_tlb(strong);
186 }
187 
188 // flush_mmu_tlb_entries: flush TLB entries that map a VA range and ASID, all cores
189 // start and end must have the ASID in the high 16 bits, with the VA in units of 4K in the lowest bits
190 // Will also flush global entries that match the VA range
191 static inline void
flush_mmu_tlb_entries_async(uint64_t start,uint64_t end,uint64_t pmap_page_size,bool last_level_only,bool strong __unused)192 flush_mmu_tlb_entries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size,
193     bool last_level_only, bool strong __unused)
194 {
195 #if __ARM_16K_PG__
196 	if (pmap_page_size == 16384) {
197 		start = start & ~0x3ULL;
198 
199 		/*
200 		 * The code below is not necessarily correct.  From an overview of
201 		 * the client code, the expected contract for TLB flushes is that
202 		 * we will expand from an "address, length" pair to "start address,
203 		 * end address" in the course of a TLB flush.  This suggests that
204 		 * a flush for "X, X+4" is actually only asking for a flush of a
205 		 * single 16KB page.  At the same time, we'd like to be prepared
206 		 * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page
207 		 * number to a 16KB page boundary.  This should deal correctly with
208 		 * unaligned inputs.
209 		 *
210 		 * If our expecations about client behavior are wrong however, this
211 		 * will lead to occasional TLB corruption on platforms with 16KB
212 		 * pages.
213 		 */
214 		end = (end + 0x3ULL) & ~0x3ULL;
215 	}
216 #endif // __ARM_16K_PG__
217 #if __ARM_KERNEL_PROTECT__
218 	uint64_t asid = start >> TLBI_ASID_SHIFT;
219 	/*
220 	 * If we are flushing ASID 0, this is a kernel operation.  With this
221 	 * ASID scheme, this means we should flush all ASIDs.
222 	 */
223 	if (asid == 0) {
224 		if (last_level_only) {
225 			for (; start < end; start += (pmap_page_size / 4096)) {
226 				asm volatile ("tlbi vaale1is, %0" : : "r"(start));
227 			}
228 		} else {
229 			for (; start < end; start += (pmap_page_size / 4096)) {
230 				asm volatile ("tlbi vaae1is, %0" : : "r"(start));
231 			}
232 		}
233 		return;
234 	}
235 	start = start | (1ULL << TLBI_ASID_SHIFT);
236 	end = end | (1ULL << TLBI_ASID_SHIFT);
237 	if (last_level_only) {
238 		for (; start < end; start += (pmap_page_size / 4096)) {
239 			start = start & ~(1ULL << TLBI_ASID_SHIFT);
240 			asm volatile ("tlbi vale1is, %0" : : "r"(start));
241 			start = start | (1ULL << TLBI_ASID_SHIFT);
242 			asm volatile ("tlbi vale1is, %0" : : "r"(start));
243 		}
244 	} else {
245 		for (; start < end; start += (pmap_page_size / 4096)) {
246 			start = start & ~(1ULL << TLBI_ASID_SHIFT);
247 			asm volatile ("tlbi vae1is, %0" : : "r"(start));
248 			start = start | (1ULL << TLBI_ASID_SHIFT);
249 			asm volatile ("tlbi vae1is, %0" : : "r"(start));
250 		}
251 	}
252 #else
253 	if (last_level_only) {
254 		for (; start < end; start += (pmap_page_size / 4096)) {
255 #if HAS_FEAT_XS
256 			if (__probable(!strong)) {
257 				asm volatile ("tlbi vale1isnxs, %0" : : "r"(start));
258 			} else
259 #endif /* HAS_FEAT_XS */
260 			{
261 				asm volatile ("tlbi vale1is, %0" : : "r"(start));
262 			}
263 		}
264 	} else {
265 		for (; start < end; start += (pmap_page_size / 4096)) {
266 #if HAS_FEAT_XS
267 			if (__probable(!strong)) {
268 				asm volatile ("tlbi vae1isnxs, %0" : : "r"(start));
269 			} else
270 #endif /* HAS_FEAT_XS */
271 			{
272 				asm volatile ("tlbi vae1is, %0" : : "r"(start));
273 			}
274 		}
275 	}
276 #endif /* __ARM_KERNEL_PROTECT__ */
277 }
278 
279 static inline void
flush_mmu_tlb_entries(uint64_t start,uint64_t end,uint64_t pmap_page_size,bool last_level_only,bool strong)280 flush_mmu_tlb_entries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong)
281 {
282 	flush_mmu_tlb_entries_async(start, end, pmap_page_size, last_level_only, strong);
283 	arm64_sync_tlb(strong);
284 }
285 
286 // flush_mmu_tlb_asid: flush all entries that match an ASID, on all cores
287 // ASID must be in high 16 bits of argument
288 // Will not flush global entries
289 static inline void
flush_mmu_tlb_asid_async(uint64_t val,bool strong __unused)290 flush_mmu_tlb_asid_async(uint64_t val, bool strong __unused)
291 {
292 #if __ARM_KERNEL_PROTECT__
293 	/*
294 	 * If we are flushing ASID 0, this is a kernel operation.  With this
295 	 * ASID scheme, this means we should flush all ASIDs.
296 	 */
297 	uint64_t asid = val >> TLBI_ASID_SHIFT;
298 	if (asid == 0) {
299 		asm volatile ("tlbi vmalle1is");
300 		return;
301 	}
302 	val = val & ~(1ULL << TLBI_ASID_SHIFT);
303 	asm volatile ("tlbi aside1is, %0" : : "r"(val));
304 	val = val | (1ULL << TLBI_ASID_SHIFT);
305 #endif /* __ARM_KERNEL_PROTECT__ */
306 #if HAS_FEAT_XS
307 	if (__probable(!strong)) {
308 		asm volatile ("tlbi aside1isnxs, %0" : : "r"(val));
309 	} else
310 #endif /* HAS_FEAT_XS */
311 	{
312 		asm volatile ("tlbi aside1is, %0" : : "r"(val));
313 	}
314 }
315 
316 static inline void
flush_mmu_tlb_asid(uint64_t val,bool strong)317 flush_mmu_tlb_asid(uint64_t val, bool strong)
318 {
319 	flush_mmu_tlb_asid_async(val, strong);
320 	arm64_sync_tlb(strong);
321 }
322 
323 // flush_core_tlb_asid: flush all entries that match an ASID, local core only
324 // ASID must be in high 16 bits of argument
325 // Will not flush global entries
326 static inline void
flush_core_tlb_asid_async(uint64_t val)327 flush_core_tlb_asid_async(uint64_t val)
328 {
329 #if __ARM_KERNEL_PROTECT__
330 	/*
331 	 * If we are flushing ASID 0, this is a kernel operation.  With this
332 	 * ASID scheme, this means we should flush all ASIDs.
333 	 */
334 	uint64_t asid = val >> TLBI_ASID_SHIFT;
335 	if (asid == 0) {
336 		asm volatile ("tlbi vmalle1");
337 		return;
338 	}
339 	val = val & ~(1ULL << TLBI_ASID_SHIFT);
340 	asm volatile ("tlbi aside1, %0" : : "r"(val));
341 	val = val | (1ULL << TLBI_ASID_SHIFT);
342 #endif /* __ARM_KERNEL_PROTECT__ */
343 #if HAS_FEAT_XS
344 	asm volatile ("tlbi aside1nxs, %0" : : "r"(val));
345 #else
346 	asm volatile ("tlbi aside1, %0" : : "r"(val));
347 #endif /* HAS_FEAT_XS */
348 }
349 
350 static inline void
flush_core_tlb_asid(uint64_t val)351 flush_core_tlb_asid(uint64_t val)
352 {
353 	flush_core_tlb_asid_async(val);
354 	sync_tlb_flush_local();
355 }
356 
357 #if __ARM_RANGE_TLBI__
358 #if __ARM_KERNEL_PROTECT__
359 	#error __ARM_RANGE_TLBI__ + __ARM_KERNEL_PROTECT__ is not currently supported
360 #endif
361 
362 #define ARM64_TLB_RANGE_MIN_PAGES 2
363 #define ARM64_TLB_RANGE_MAX_PAGES (1ULL << 21)
364 #define rtlbi_addr(x, shift) (((x) >> (shift)) & RTLBI_ADDR_MASK)
365 #define rtlbi_scale(x) ((uint64_t)(x) << RTLBI_SCALE_SHIFT)
366 #define rtlbi_num(x) ((uint64_t)(x) << RTLBI_NUM_SHIFT)
367 
368 /**
369  * Given the number of pages to invalidate, generate the correct parameter to
370  * pass to any of the TLBI by range methods.
371  */
372 static inline uint64_t
generate_rtlbi_param(ppnum_t npages,uint32_t asid,vm_offset_t va,uint64_t pmap_page_shift)373 generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va, uint64_t pmap_page_shift)
374 {
375 	assert(npages > 1);
376 	/**
377 	 * Per the armv8.4 RTLBI extension spec, the range encoded in the rtlbi register operand is defined by:
378 	 * BaseADDR <= VA < BaseADDR+((NUM+1)*2^(5*SCALE+1) * Translation_Granule_Size)
379 	 */
380 	unsigned order = (unsigned)(sizeof(npages) * 8) - (unsigned)__builtin_clz(npages - 1) - 1;
381 	unsigned scale = ((order ? order : 1) - 1) / 5;
382 	unsigned granule = 1 << ((5 * scale) + 1);
383 	unsigned num = (((npages + granule - 1) & ~(granule - 1)) / granule) - 1;
384 	return tlbi_asid(asid) | RTLBI_TG(pmap_page_shift) | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va, pmap_page_shift);
385 }
386 
387 // flush_mmu_tlb_range: flush TLB entries that map a VA range using a single instruction
388 // The argument should be encoded according to generate_rtlbi_param().
389 // Follows the same ASID matching behavior as flush_mmu_tlb_entries()
390 static inline void
flush_mmu_tlb_range_async(uint64_t val,bool last_level_only,bool strong __unused)391 flush_mmu_tlb_range_async(uint64_t val, bool last_level_only, bool strong __unused)
392 {
393 	if (last_level_only) {
394 #if HAS_FEAT_XS
395 		if (__probable(!strong)) {
396 			asm volatile ("tlbi rvale1isnxs, %0" : : "r"(val));
397 		} else
398 #endif /* HAS_FEAT_XS */
399 		{
400 			asm volatile ("tlbi rvale1is, %0" : : "r"(val));
401 		}
402 	} else {
403 #if HAS_FEAT_XS
404 		if (__probable(!strong)) {
405 			asm volatile ("tlbi rvae1isnxs, %0" : : "r"(val));
406 		} else
407 #endif /* HAS_FEAT_XS */
408 		{
409 			asm volatile ("tlbi rvae1is, %0" : : "r"(val));
410 		}
411 	}
412 }
413 
414 static inline void
flush_mmu_tlb_range(uint64_t val,bool last_level_only,bool strong)415 flush_mmu_tlb_range(uint64_t val, bool last_level_only, bool strong)
416 {
417 	flush_mmu_tlb_range_async(val, last_level_only, strong);
418 	arm64_sync_tlb(strong);
419 }
420 
421 // flush_mmu_tlb_allrange: flush TLB entries that map a VA range using a single instruction
422 // The argument should be encoded according to generate_rtlbi_param().
423 // Follows the same ASID matching behavior as flush_mmu_tlb_allentries()
424 static inline void
flush_mmu_tlb_allrange_async(uint64_t val,bool last_level_only,bool strong __unused)425 flush_mmu_tlb_allrange_async(uint64_t val, bool last_level_only, bool strong __unused)
426 {
427 	if (last_level_only) {
428 #if HAS_FEAT_XS
429 		if (__probable(!strong)) {
430 			asm volatile ("tlbi rvaale1isnxs, %0" : : "r"(val));
431 		} else
432 #endif /* HAS_FEAT_XS */
433 		{
434 			asm volatile ("tlbi rvaale1is, %0" : : "r"(val));
435 		}
436 	} else {
437 #if HAS_FEAT_XS
438 		if (__probable(!strong)) {
439 			asm volatile ("tlbi rvaae1isnxs, %0" : : "r"(val));
440 		} else
441 #endif /* HAS_FEAT_XS */
442 		{
443 			asm volatile ("tlbi rvaae1is, %0" : : "r"(val));
444 		}
445 	}
446 }
447 
448 static inline void
flush_mmu_tlb_allrange(uint64_t val,bool last_level_only,bool strong)449 flush_mmu_tlb_allrange(uint64_t val, bool last_level_only, bool strong)
450 {
451 	flush_mmu_tlb_allrange_async(val, last_level_only, strong);
452 	arm64_sync_tlb(strong);
453 }
454 
455 // flush_core_tlb_allrange: flush TLB entries that map a VA range using a single instruction, local core only
456 // The argument should be encoded according to generate_rtlbi_param().
457 // Follows the same ASID matching behavior as flush_mmu_tlb_allentries()
458 static inline void
flush_core_tlb_allrange_async(uint64_t val)459 flush_core_tlb_allrange_async(uint64_t val)
460 {
461 #if HAS_FEAT_XS
462 	asm volatile ("tlbi rvaae1nxs, %0" : : "r"(val));
463 #else
464 	asm volatile ("tlbi rvaae1, %0" : : "r"(val));
465 #endif /* HAS_FEAT_XS */
466 }
467 
468 static inline void
flush_core_tlb_allrange(uint64_t val)469 flush_core_tlb_allrange(uint64_t val)
470 {
471 	flush_core_tlb_allrange_async(val);
472 	sync_tlb_flush_local();
473 }
474 
475 #endif // __ARM_RANGE_TLBI__
476 
477