xref: /xnu-8796.141.3/osfmk/kern/locks_internal.h (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _KERN_LOCKS_INTERNAL_H_
30 #define _KERN_LOCKS_INTERNAL_H_
31 
32 #define LOCK_PRIVATE 1
33 #include <sys/cdefs.h>
34 #include <stdint.h>
35 #include <kern/startup.h>
36 #include <kern/percpu.h>
37 #include <kern/lock_types.h>
38 #include <kern/lock_group.h>
39 #include <machine/locks.h>
40 #include <machine/cpu_number.h>
41 #include <os/atomic_private.h>
42 
43 /*
44  * This file shares implementation details for XNU lock implementations.
45  * It is not meant to be shared with any other part of the code.
46  */
47 
48 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
49 
50 #pragma GCC visibility push(hidden)
51 
52 /*!
53  * @macro hw_spin_wait_until_n()
54  *
55  * @brief
56  * Abstracts the platform specific way to spin around the value
57  * of a memory location until a certain condition is met.
58  *
59  * @param count         how many times to spin without evaluating progress
60  * @param ptr           the pointer to the memory location being observed
61  * @param load_var      the variable to store the result of the load into
62  * @param cond_expr     the stopping condition (can use @c load_var)
63  *
64  * @returns
65  * - 0 if the loop stopped when the counter expired
66  * - cond_expr's return value otherwise
67  */
68 #define hw_spin_wait_until_n(count, ptr, load_var, cond_expr)  ({ \
69 	typeof((cond_expr)) __cond_result;                                      \
70                                                                                 \
71 	for (uint32_t __cond_init = (count), __cond_count = __cond_init;        \
72 	    __probable(__cond_count-- > 0);) {                                  \
73 	        __hw_spin_wait_load(ptr, load_var, __cond_result, cond_expr);   \
74 	        if (__probable(__cond_result)) {                                \
75 	                break;                                                  \
76 	        }                                                               \
77 	}                                                                       \
78                                                                                 \
79 	__cond_result;                                                          \
80 })
81 
82 /*!
83  * @macro hw_spin_wait_until()
84  *
85  * @brief
86  * Conveniency wrapper for hw_spin_wait_until_n() with the typical
87  * LOCK_SNOOP_SPINS counter for progress evaluation.
88  */
89 #define hw_spin_wait_until(ptr, load_var, cond_expr) \
90 	hw_spin_wait_until_n(LOCK_SNOOP_SPINS, ptr, load_var, cond_expr)
91 
92 
93 #if LOCK_PRETEST
94 #define lck_pretestv(p, e, g)  ({ \
95 	__auto_type __e = (e); \
96 	__auto_type __v = os_atomic_load(p, relaxed); \
97 	if (__v != __e) { \
98 	        *(g) = __v; \
99 	} \
100 	__v == __e; \
101 })
102 #define lck_pretest(p, e) \
103 	(os_atomic_load(p, relaxed) == (e))
104 #else
105 #define lck_pretestv(p, e, g)   1
106 #define lck_pretest(p, e)       1
107 #endif
108 
109 /*!
110  * @function lock_cmpxchg
111  *
112  * @brief
113  * Similar to os_atomic_cmpxchg() but with a pretest when LOCK_PRETEST is set.
114  */
115 #define lock_cmpxchg(p, e, v, m)  ({ \
116 	__auto_type _p = (p);                                          \
117 	__auto_type _e = (e);                                          \
118 	lck_pretest(_p, _e) && os_atomic_cmpxchg(_p, _e, v, m);        \
119 })
120 
121 /*!
122  * @function lock_cmpxchgv
123  *
124  * @brief
125  * Similar to os_atomic_cmpxchgv() but with a pretest when LOCK_PRETEST is set.
126  */
127 #define lock_cmpxchgv(p, e, v, g, m)  ({ \
128 	__auto_type _p = (p);                                           \
129 	__auto_type _e = (e);                                           \
130 	lck_pretestv(_p, _e, g) && os_atomic_cmpxchgv(_p, _e, v, g, m); \
131 })
132 
133 #if OS_ATOMIC_HAS_LLSC
134 #define lock_load_exclusive(p, m)               os_atomic_load_exclusive(p, m)
135 #define lock_wait_for_event()                   wait_for_event()
136 #define lock_store_exclusive(p, ov, nv, m)      os_atomic_store_exclusive(p, nv, m)
137 #else
138 #define lock_load_exclusive(p, m)               os_atomic_load(p, relaxed)
139 #define lock_wait_for_event()                   cpu_pause()
140 #define lock_store_exclusive(p, ov, nv, m)      os_atomic_cmpxchg(p, ov, nv, m)
141 #endif
142 
143 
144 /*!
145  * @enum lck_type_t
146  *
147  * @brief
148  * A one-byte type tag used in byte 3 of locks to be able to identify them.
149  */
150 __enum_decl(lck_type_t, uint8_t, {
151 	LCK_TYPE_NONE           = 0x00,
152 	LCK_TYPE_MUTEX          = 0x22,
153 	LCK_TYPE_RW             = 0x33,
154 	LCK_TYPE_TICKET         = 0x44,
155 	LCK_TYPE_GATE           = 0x55,
156 });
157 
158 
159 /*!
160  * @typedef lck_mtx_mcs_t
161  *
162  * @brief
163  * The type of per-cpu MCS-like nodes used for the mutex acquisition slowpath.
164  *
165  * @discussion
166  * There is one such structure per CPU: such nodes are used with preemption
167  * disabled, and using kernel mutexes in interrupt context isn't allowed.
168  *
169  * The nodes are used not as a lock as in traditional MCS, but to order
170  * waiters. The head of the queue spins against the lock itself, which allows
171  * to release the MCS node once the kernel mutex is acquired.
172  *
173  * Those nodes provide 2 queues:
174  *
175  * 1. an adaptive spin queue that is used to order threads who chose to
176  *    adaptively spin to wait for the lock to become available,
177  *
178  *    This queue is doubly linked, threads can add themselves concurrently,
179  *    the interlock of the mutex is required to dequeue.
180  *
181  * 2. an interlock queue which is more typical MCS.
182  */
183 typedef struct lck_mtx_mcs {
184 	struct _lck_mtx_       *lmm_ilk_current;
185 
186 	struct lck_mtx_mcs     *lmm_ilk_next;
187 	unsigned long           lmm_ilk_ready;
188 
189 	struct lck_mtx_mcs     *lmm_as_next;
190 	unsigned long long      lmm_as_prev;
191 } __attribute__((aligned(64))) * lck_mtx_mcs_t;
192 
193 
194 /*!
195  * @typedef lck_spin_mcs_t
196  *
197  * @brief
198  * The type of per-cpu MCS-like nodes used for various spinlock wait queues.
199  *
200  * @discussion
201  * Unlike the mutex ones, these nodes can be used for spinlocks taken
202  * in interrupt context.
203  */
204 typedef struct lck_spin_mcs {
205 	struct lck_spin_mcs    *lsm_next;
206 	const void             *lsm_lock;
207 	unsigned long           lsm_ready;
208 } *lck_spin_mcs_t;
209 
210 
211 typedef struct lck_mcs {
212 	struct lck_mtx_mcs      mcs_mtx;
213 	volatile unsigned long  mcs_spin_rsv;
214 	struct lck_spin_mcs     mcs_spin[2];
215 } __attribute__((aligned(128))) * lck_mcs_t;
216 
217 
218 PERCPU_DECL(struct lck_mcs, lck_mcs);
219 
220 typedef uint16_t lck_mcs_id_t;
221 
222 #define LCK_MCS_ID_CPU_MASK     0x3fff
223 #define LCK_MCS_ID_SLOT_SHIFT       14
224 #define LCK_MCS_ID_SLOT_MASK    0xc000
225 
226 #define LCK_MCS_SLOT_0               0
227 #define LCK_MCS_SLOT_1               1
228 
229 static inline lck_mcs_id_t
lck_mcs_id_make(int cpu,unsigned long slot)230 lck_mcs_id_make(int cpu, unsigned long slot)
231 {
232 	return (uint16_t)(((slot + 1) << LCK_MCS_ID_SLOT_SHIFT) | cpu);
233 }
234 
235 static inline lck_mcs_id_t
lck_mcs_id_current(unsigned long slot)236 lck_mcs_id_current(unsigned long slot)
237 {
238 	return lck_mcs_id_make(cpu_number(), slot);
239 }
240 
241 static inline uint16_t
lck_mcs_id_cpu(lck_mcs_id_t mcs_id)242 lck_mcs_id_cpu(lck_mcs_id_t mcs_id)
243 {
244 	return mcs_id & LCK_MCS_ID_CPU_MASK;
245 }
246 
247 static inline uint16_t
lck_mcs_id_slot(lck_mcs_id_t mcs_id)248 lck_mcs_id_slot(lck_mcs_id_t mcs_id)
249 {
250 	return (mcs_id >> LCK_MCS_ID_SLOT_SHIFT) - 1;
251 }
252 
253 static inline lck_mcs_t
lck_mcs_get_current(void)254 lck_mcs_get_current(void)
255 {
256 	return PERCPU_GET(lck_mcs);
257 }
258 
259 static inline lck_mcs_t
lck_mcs_get_other(lck_mcs_id_t mcs_id)260 lck_mcs_get_other(lck_mcs_id_t mcs_id)
261 {
262 	vm_offset_t base = other_percpu_base(lck_mcs_id_cpu(mcs_id));
263 
264 	return PERCPU_GET_WITH_BASE(base, lck_mcs);
265 }
266 
267 
268 static inline lck_spin_mcs_t
lck_spin_mcs_decode(lck_mcs_id_t mcs_id)269 lck_spin_mcs_decode(lck_mcs_id_t mcs_id)
270 {
271 	lck_mcs_t other = lck_mcs_get_other(mcs_id);
272 
273 	return &other->mcs_spin[lck_mcs_id_slot(mcs_id)];
274 }
275 
276 typedef struct {
277 	lck_mcs_t               txn_mcs;
278 	lck_spin_mcs_t          txn_slot;
279 	lck_mcs_id_t            txn_mcs_id;
280 } lck_spin_txn_t;
281 
282 static inline lck_spin_txn_t
lck_spin_txn_begin(void * lck)283 lck_spin_txn_begin(void *lck)
284 {
285 	lck_spin_txn_t txn;
286 	unsigned long slot;
287 
288 	txn.txn_mcs = lck_mcs_get_current();
289 	os_compiler_barrier();
290 	slot = txn.txn_mcs->mcs_spin_rsv++;
291 	assert(slot <= LCK_MCS_SLOT_1);
292 	os_compiler_barrier();
293 
294 	txn.txn_mcs_id = lck_mcs_id_current(slot);
295 	txn.txn_slot = &txn.txn_mcs->mcs_spin[slot];
296 	txn.txn_slot->lsm_lock  = lck;
297 
298 	return txn;
299 }
300 
301 static inline bool
lck_spin_txn_enqueue(lck_spin_txn_t * txn,lck_mcs_id_t * tail)302 lck_spin_txn_enqueue(lck_spin_txn_t *txn, lck_mcs_id_t *tail)
303 {
304 	lck_spin_mcs_t  pnode;
305 	lck_mcs_id_t    pidx;
306 
307 	pidx = os_atomic_xchg(tail, txn->txn_mcs_id, release);
308 	if (pidx) {
309 		pnode = lck_spin_mcs_decode(pidx);
310 		os_atomic_store(&pnode->lsm_next, txn->txn_slot, relaxed);
311 		return true;
312 	}
313 
314 	return false;
315 }
316 
317 static inline void
lck_spin_txn_end(lck_spin_txn_t * txn)318 lck_spin_txn_end(lck_spin_txn_t *txn)
319 {
320 	unsigned long   slot = lck_mcs_id_slot(txn->txn_mcs_id);
321 	lck_mcs_t       mcs  = txn->txn_mcs;
322 
323 	*txn->txn_slot = (struct lck_spin_mcs){ };
324 	*txn           = (lck_spin_txn_t){ };
325 
326 	assert(mcs->mcs_spin_rsv == slot + 1);
327 	os_atomic_store(&mcs->mcs_spin_rsv, slot, compiler_acq_rel);
328 }
329 
330 
331 #pragma GCC visibility pop
332 
333 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
334 
335 #endif /* _KERN_LOCKS_INTERNAL_H_ */
336