xref: /xnu-8796.101.5/osfmk/arm/commpage/commpage_asm.s (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1/*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <machine/asm.h>
30
31/* This section has all the code necessary for the atomic operations supported by
32 * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform.
33 *
34 * This code needs to be compiled as 1 section and should not make branches
35 * outside of this section. This allows us to copy the entire section to the
36 * text comm page once it is created - see osfmk/arm/commpage/commpage.c
37 *
38 * This section is split into 2 parts - the preemption-free zone (PFZ) routines
39 * and the preemptible routines (non-PFZ). The PFZ routines will not be
40 * preempted by the scheduler if the pc of the userspace process is in that
41 * region while handling asynchronous interrupts (note that traps are still
42 * possible in the PFZ). Instead, the scheduler will mark x15 (known through
43 * coordination with the functions in the commpage section) to indicate to the
44 * userspace code that it needs to take a delayed preemption. The PFZ functions
45 * may make callouts to preemptible routines and vice-versa. When a function
46 * returns to a preemptible routine after a callout to a function in the PFZ, it
47 * needs to check x15 to determine if a delayed preemption needs to be taken. In
48 * addition, functions in the PFZ should not have backwards branches.
49 *
50 * The entry point to execute code in the commpage text section is through the
51 * jump table at the very top of the section. The base of the jump table is
52 * exposed to userspace via the APPLE array and the offsets from the base of the
53 * jump table are listed in the arm/cpu_capabilities.h header. Adding any new
54 * functions in the PFZ requires a lockstep change to the cpu_capabilities.h
55 * header.
56 *
57 * Functions in PFZ:
58 *		Enqueue function
59 *		Dequeue function
60 *
61 * Functions not in PFZ:
62 *		Backoff function as part of spin loop
63 *		Preempt function to take delayed preemption as indicated by kernel
64 *
65 * ----------------------------------------------------------------------
66 *
67 * The high level goal of the asm code in this section is to enqueue and dequeue
68 * from a FIFO linked list.
69 *
70 * typedef volatile struct {
71 *		void *opaque1; <-- ptr to first queue element or null
72 *		void *opaque2; <-- ptr to second queue element or null
73 *		int opaque3; <-- spinlock
74 * } OSFifoQueueHead;
75 *
76 * This is done through a userspace spin lock stored in the linked list head
77 * for synchronization.
78 *
79 * Here is the pseudocode for the spin lock acquire algorithm which is split
80 * between the PFZ and the non-PFZ areas of the commpage text section. The
81 * pseudocode here is just for the enqueue operation but it is symmetrical for
82 * the dequeue operation.
83 *
84 * // Not in the PFZ. Entry from jump table.
85 * ENQUEUE()
86 *		enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
87 *		// We're running here after running the TRY_LOCK_AND_ENQUEUE code in
88 *		// the PFZ so we need to check if we need to take a delayed
89 *		// preemption.
90 *		if (kernel_wants_to_preempt_us){
91 *			// This is done through the pfz_exit() mach trap which is a dummy
92 *			// syscall whose sole purpose is to allow the thread to enter the
93 *			// kernel so that it can be preempted at AST.
94 *			enter_kernel_to_take_delayed_preemption()
95 *		}
96 *
97 *		if (!enqueued) {
98 *			ARM_MONITOR;
99 *			WFE;
100 *			enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
101 *			if (!enqueued) {
102 *				// We failed twice, take a backoff
103 *				BACKOFF();
104 *				goto ENQUEUE()
105 *			} else {
106 *				// We got here from PFZ, check for delayed preemption
107 *				if (kernel_wants_to_preempt_us){
108 *					enter_kernel_to_take_delayed_preemption()
109 *				}
110 *			}
111 *		}
112 *
113 * // in PFZ
114 * TRY_LOCK_AND_ENQUEUE():
115 *		is_locked = try_lock(lock_addr);
116 *		if (is_locked) {
117 *			<do enqueue operation>
118 *			return true
119 *		} else {
120 *			return false
121 *		}
122 *
123 *
124 * // Not in the PFZ
125 * BACKOFF():
126 *		// We're running here after running the TRY_LOCK_AND_ENQUEUE code in
127 *		// the PFZ so we need to check if we need to take a delayed
128 *		// preemption.
129 *		if (kernel_wants_to_preempt_us) {
130 *			enter_kernel_to_take_preemption()
131 *		} else {
132 *			// Note that it is safe to do this loop here since the entire
133 *			// BACKOFF function isn't in the PFZ and so can be preempted at any
134 *			// time
135 *			do {
136 *				lock_is_free = peek(lock_addr);
137 *				if (lock_is_free) {
138 *					return
139 *				} else {
140 *					pause_with_monitor(lock_addr)
141 *				}
142 *			} while (1)
143 *		}
144 */
145
146/* Macros and helpers */
147
148.macro BACKOFF lock_addr
149	// Save registers we can't clobber
150	stp		x0, x1, [sp, #-16]!
151	stp		x2, x9, [sp, #-16]!
152
153	// Pass in lock addr to backoff function
154	mov		x0, \lock_addr
155	bl		_backoff			// Jump out of the PFZ zone now
156
157	// Restore registers
158	ldp		x2, x9, [sp], #16
159	ldp		x0, x1, [sp], #16
160.endmacro
161
162/* x0 = pointer to queue head
163 * x1 = pointer to new elem to enqueue
164 * x2 = offset of link field inside element
165 * x3 = Address of lock
166 *
167 * Moves result of the helper function to the register specified
168 */
169.macro TRYLOCK_ENQUEUE result
170	stp		x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
171
172	bl	_pfz_trylock_and_enqueue
173	mov		\result, x0
174
175	ldp		x0, xzr, [sp], #16 // Restore saved registers
176.endmacro
177
178/* x0 = pointer to queue head
179 * x1 = offset of link field inside element
180 * x2 = Address of lock
181 *
182 * Moves result of the helper function to the register specified
183 */
184.macro TRYLOCK_DEQUEUE result
185	stp		x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
186
187	bl	_pfz_trylock_and_dequeue
188	mov		\result, x0
189
190	ldp		x0, xzr, [sp], #16 // Restore saved registers
191.endmacro
192
193/*
194 * Takes a delayed preemption if needed and then branches to the label
195 * specified.
196 *
197 * Modifies x15
198 */
199.macro PREEMPT_SELF_THEN branch_to_take_on_success
200	cbz		x15,  \branch_to_take_on_success // No delayed preemption to take, just try again
201
202	mov		x15, xzr				// zero out the preemption pending field
203	bl _preempt_self
204	b \branch_to_take_on_success
205.endmacro
206
207	.section __TEXT_EXEC,__commpage_text,regular,pure_instructions
208
209	/* Preemption free functions */
210	.align 2
211_jump_table:				// 32 entry jump table, only 2 are used
212	b	_pfz_enqueue
213	b	_pfz_dequeue
214	brk #666
215	brk #666
216	brk #666
217	brk #666
218	brk #666
219	brk #666
220	brk #666
221	brk #666
222	brk #666
223	brk #666
224	brk #666
225	brk #666
226	brk #666
227	brk #666
228	brk #666
229	brk #666
230	brk #666
231	brk #666
232	brk #666
233	brk #666
234	brk #666
235	brk #666
236	brk #666
237	brk #666
238	brk #666
239	brk #666
240	brk #666
241	brk #666
242	brk #666
243	brk #666
244
245
246/*
247 * typedef volatile struct {
248 *		void *opaque1; <-- ptr to first queue element or null
249 *		void *opaque2; <-- ptr to second queue element or null
250 *		int opaque3; <-- spinlock
251 * } osfifoqueuehead;
252 */
253
254/* Non-preemptible helper routine to FIFO enqueue:
255 * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr);
256 *
257 * x0 = pointer to queue head structure
258 * x1 = pointer to new element to enqueue
259 * x2 = offset of link field inside element
260 * x3 = address of lock
261 *
262 * Only caller save registers (x9 - x15) are used in this function
263 *
264 * Returns 0 on success and non-zero value on failure
265 */
266	.globl _pfz_trylock_and_enqueue
267	.align 2
268_pfz_trylock_and_enqueue:
269	ARM64_STACK_PROLOG
270	PUSH_FRAME
271
272	mov		w10, wzr		 // unlock value = w10 = 0
273	mov		w11, #1			 // locked value = w11 = 1
274
275	// Try to grab the lock
276	casa	w10, w11, [x3]	 // Atomic CAS with acquire barrier
277	cbz		w10, Ltrylock_enqueue_success
278
279	mov		x0, #-1			// Failed
280	b Ltrylock_enqueue_exit
281
282	/* We got the lock, enqueue the element */
283
284Ltrylock_enqueue_success:
285	ldr		x10, [x0, #8]	 // x10 = tail of the queue
286	cbnz	x10, Lnon_empty_queue // tail not NULL
287	str		x1, [x0]		 // Set head to new element
288	b		Lset_new_tail
289
290Lnon_empty_queue:
291	str		x1, [x10, x2]	// Set old tail -> offset = new elem
292
293Lset_new_tail:
294	str		x1, [x0, #8]		// Set tail = new elem
295
296	// Drop spin lock with release barrier (pairs with acquire in casa)
297	stlr	wzr, [x3]
298
299	mov		x0, xzr				// Mark success
300
301Ltrylock_enqueue_exit:
302	POP_FRAME
303	ARM64_STACK_EPILOG
304
305/* Non-preemptible helper routine to FIFO dequeue:
306 * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr);
307 *
308 * x0 = pointer to queue head structure
309 * x1 = pointer to new element to enqueue
310 * x2 = address of lock
311 *
312 * Only caller save registers (x9 - x15) are used in this function
313 *
314 * Returns -1 on failure, and the pointer on success (can be NULL)
315 */
316	.globl _pfz_trylock_and_dequeue
317	.align 2
318_pfz_trylock_and_dequeue:
319	ARM64_STACK_PROLOG
320	PUSH_FRAME
321
322	// Try to grab the lock
323	mov		w10, wzr		 // unlock value = w10 = 0
324	mov		w11, #1			 // locked value = w11 = 1
325
326	casa	w10, w11, [x2]	 // Atomic CAS with acquire barrier
327	cbz		w10, Ltrylock_dequeue_success
328
329	mov		x0, #-1			// Failed
330	b Ltrylock_dequeue_exit
331
332	/* We got the lock, dequeue the element */
333Ltrylock_dequeue_success:
334	ldr		x10, [x0]	 // x10 = head of the queue
335	cbz		x10, Lreturn_head // if head is null, return
336
337	ldr		x11, [x10, x1]	// get ptr to new head
338	cbnz	x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head
339
340	// Singleton case
341	str		xzr, [x0, #8]	// dequeuing from singleton queue, update tail to NULL
342
343Lupdate_new_head:
344	str		xzr, [x10, x1]	// zero the link in the old head
345	str		x11, [x0]		// Set up a new head
346
347Lreturn_head:
348	mov		x0, x10			// Move head to x0
349	stlr	wzr, [x2]		// Drop spin lock with release barrier (pairs with acquire in casa)
350
351Ltrylock_dequeue_exit:
352	POP_FRAME
353	ARM64_STACK_EPILOG
354
355
356	/* Preemptible functions */
357	.private_extern _commpage_text_preemptible_functions
358_commpage_text_preemptible_functions:
359
360
361/*
362 * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset);
363 * x0 = pointer to queue head
364 * x1 = pointer to new elem to enqueue
365 * x2 = offset of link field inside element
366 */
367	.globl _pfz_enqueue
368
369	.align 2
370_pfz_enqueue:
371	ARM64_STACK_PROLOG
372	PUSH_FRAME
373
374	str		xzr, [x1, x2]	// Zero the forward link in the new element
375	mov		x15, xzr		// zero out the register used to communicate with kernel
376
377	add		x3, x0, #16		// address of lock = x3 = x0 + 16
378Lenqueue_trylock_loop:
379
380	// Attempt #1
381	TRYLOCK_ENQUEUE x9
382	PREEMPT_SELF_THEN Lenqueue_determine_success
383
384Lenqueue_determine_success:
385
386	cbz		x9, Lenqueue_success // did we succeed? if so, exit
387
388	ldxr	w9, [x3]		// arm the monitor for the lock address
389	cbz		w9, Lenqueue_clear_monitor // lock is available, retry.
390
391	wfe						// Wait with monitor armed
392
393	// Attempt #2
394	TRYLOCK_ENQUEUE x9
395	cbz		x9, Lenqueue_take_delayed_preemption_upon_success  // did we succeed? if so, exit
396
397	// We failed twice - backoff then try again
398
399	BACKOFF x3
400	b Lenqueue_trylock_loop
401
402Lenqueue_clear_monitor:
403	clrex							// Pairs with the ldxr
404
405	// Take a preemption if needed then branch to enqueue_trylock_loop
406	PREEMPT_SELF_THEN Lenqueue_trylock_loop
407
408Lenqueue_take_delayed_preemption_upon_success:
409	PREEMPT_SELF_THEN Lenqueue_success
410
411Lenqueue_success:
412	POP_FRAME
413	ARM64_STACK_EPILOG
414
415/*
416 * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset);
417 * x0 = pointer to queue head
418 * x1 = offset of link field inside element
419 *
420 * This function is not in the PFZ but calls out to a helper which is in the PFZ
421 * (_pfz_trylock_and_dequeue)
422 */
423	.globl	_pfz_dequeue
424	.align 2
425_pfz_dequeue:
426	ARM64_STACK_PROLOG
427	PUSH_FRAME
428
429	mov		x15, xzr		// zero out the register used to communicate with kernel
430
431	add		x2, x0, #16		// address of lock = x2 = x0 + 16
432Ldequeue_trylock_loop:
433
434	// Attempt #1
435	TRYLOCK_DEQUEUE x9
436	PREEMPT_SELF_THEN Ldequeue_determine_success
437
438Ldequeue_determine_success:
439	cmp		x9, #-1			// is result of dequeue == -1?
440	b.ne	Ldequeue_success // no, we succeeded
441
442	ldxr	w9, [x2]		// arm the monitor for the lock address
443	cbz		w9, Ldequeue_clear_monitor // lock is available, retry.
444
445	wfe						// Wait with monitor armed
446
447	// Attempt #2
448	TRYLOCK_DEQUEUE x9
449	cmp		x9, #-1		// did we fail?
450	b.ne	Ldequeue_take_delayed_preemption_upon_success // no, we succeeded
451
452	// We failed twice - backoff then try again
453
454	BACKOFF x2
455	b	Ldequeue_trylock_loop
456
457Ldequeue_take_delayed_preemption_upon_success:
458	// We just got here after executing PFZ code, check if we need a preemption
459	PREEMPT_SELF_THEN Ldequeue_success
460
461Ldequeue_clear_monitor:
462	clrex							// Pairs with the ldxr
463	// Take a preemption if needed then branch to dequeue_trylock_loop.
464	PREEMPT_SELF_THEN Ldequeue_trylock_loop
465
466Ldequeue_success:
467	mov		x0, x9		// Move x9 (where result was stored earlier) to x0
468	POP_FRAME
469	ARM64_STACK_EPILOG
470
471
472/* void preempt_self(void)
473 *
474 * Make a syscall to take a preemption. This function is not in the PFZ.
475 */
476	.align 2
477_preempt_self:
478	ARM64_STACK_PROLOG
479	PUSH_FRAME
480
481	// Save registers on which will be clobbered by mach trap on stack and keep
482	// it 16 byte aligned
483	stp		x0, x1, [sp, #-16]!
484
485	// Note: We don't need to caller save registers since svc will trigger an
486	// exception and kernel will save and restore register state
487
488	// Make syscall to take delayed preemption
489	mov		x16, #-58	// -58 = pfz_exit
490	svc		#0x80
491
492	// Restore registers from stack
493	ldp		x0, x1, [sp], #16
494
495	POP_FRAME
496	ARM64_STACK_EPILOG
497
498/*
499 *	void backoff(uint32_t *lock_addr);
500 * The function returns when it observes that the lock has become available.
501 * This function is not in the PFZ.
502 *
503 * x0 = lock address
504 */
505	.align 2
506	.globl _backoff
507_backoff:
508	ARM64_STACK_PROLOG
509	PUSH_FRAME
510
511	cbz		x15, Lno_preempt	// Kernel doesn't want to preempt us, jump to loop
512
513	mov		x15, xzr	// zero out the preemption pending field
514	bl _preempt_self
515
516Lno_preempt:
517	ldxr	w9, [x0]		// Snoop on lock and arm the monitor
518	cbz		w9, Lend_backoff // The lock seems to be available, return
519
520	wfe						// pause
521
522	b	Lno_preempt
523
524Lend_backoff:
525	clrex
526
527	POP_FRAME
528	ARM64_STACK_EPILOG
529