xref: /xnu-11215.41.3/osfmk/kern/locks_internal.h (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _KERN_LOCKS_INTERNAL_H_
30 #define _KERN_LOCKS_INTERNAL_H_
31 
32 #define LOCK_PRIVATE 1
33 #include <sys/cdefs.h>
34 #include <stdint.h>
35 #include <kern/startup.h>
36 #include <kern/percpu.h>
37 #include <kern/lock_types.h>
38 #include <kern/lock_group.h>
39 #include <machine/cpu_number.h>
40 #include <machine/locks.h>
41 #include <machine/machine_cpu.h>
42 #include <os/atomic_private.h>
43 
44 /*
45  * This file shares implementation details for XNU lock implementations.
46  * It is not meant to be shared with any other part of the code.
47  */
48 
49 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
50 
51 #pragma GCC visibility push(hidden)
52 
53 /*!
54  * @macro hw_spin_wait_until_n()
55  *
56  * @brief
57  * Abstracts the platform specific way to spin around the value
58  * of a memory location until a certain condition is met.
59  *
60  * @param count         how many times to spin without evaluating progress
61  * @param ptr           the pointer to the memory location being observed
62  * @param load_var      the variable to store the result of the load into
63  * @param cond_expr     the stopping condition (can use @c load_var)
64  *
65  * @returns
66  * - 0 if the loop stopped when the counter expired
67  * - cond_expr's return value otherwise
68  */
69 #define hw_spin_wait_until_n(count, ptr, load_var, cond_expr)  ({ \
70 	typeof((cond_expr)) __cond_result;                                      \
71                                                                                 \
72 	for (uint32_t __cond_init = (count), __cond_count = __cond_init;        \
73 	    __probable(__cond_count-- > 0);) {                                  \
74 	        __hw_spin_wait_load(ptr, load_var, __cond_result, cond_expr);   \
75 	        if (__probable(__cond_result)) {                                \
76 	                break;                                                  \
77 	        }                                                               \
78 	}                                                                       \
79                                                                                 \
80 	__cond_result;                                                          \
81 })
82 
83 /*!
84  * @macro hw_spin_wait_until()
85  *
86  * @brief
87  * Conveniency wrapper for hw_spin_wait_until_n() with the typical
88  * LOCK_SNOOP_SPINS counter for progress evaluation.
89  */
90 #define hw_spin_wait_until(ptr, load_var, cond_expr) \
91 	hw_spin_wait_until_n(LOCK_SNOOP_SPINS, ptr, load_var, cond_expr)
92 
93 
94 #if LOCK_PRETEST
95 #define lck_pretestv(p, e, g)  ({ \
96 	__auto_type __e = (e); \
97 	__auto_type __v = os_atomic_load(p, relaxed); \
98 	if (__v != __e) { \
99 	        *(g) = __v; \
100 	} \
101 	__v == __e; \
102 })
103 #define lck_pretest(p, e) \
104 	(os_atomic_load(p, relaxed) == (e))
105 #else
106 #define lck_pretestv(p, e, g)   1
107 #define lck_pretest(p, e)       1
108 #endif
109 
110 /*!
111  * @function lock_cmpxchg
112  *
113  * @brief
114  * Similar to os_atomic_cmpxchg() but with a pretest when LOCK_PRETEST is set.
115  */
116 #define lock_cmpxchg(p, e, v, m)  ({ \
117 	__auto_type _p = (p);                                          \
118 	__auto_type _e = (e);                                          \
119 	lck_pretest(_p, _e) && os_atomic_cmpxchg(_p, _e, v, m);        \
120 })
121 
122 /*!
123  * @function lock_cmpxchgv
124  *
125  * @brief
126  * Similar to os_atomic_cmpxchgv() but with a pretest when LOCK_PRETEST is set.
127  */
128 #define lock_cmpxchgv(p, e, v, g, m)  ({ \
129 	__auto_type _p = (p);                                           \
130 	__auto_type _e = (e);                                           \
131 	lck_pretestv(_p, _e, g) && os_atomic_cmpxchgv(_p, _e, v, g, m); \
132 })
133 
134 #if OS_ATOMIC_HAS_LLSC
135 #define lock_load_exclusive(p, m)               os_atomic_load_exclusive(p, m)
136 #define lock_wait_for_event()                   wait_for_event()
137 #define lock_store_exclusive(p, ov, nv, m)      os_atomic_store_exclusive(p, nv, m)
138 #else
139 #define lock_load_exclusive(p, m)               os_atomic_load(p, relaxed)
140 #define lock_wait_for_event()                   cpu_pause()
141 #define lock_store_exclusive(p, ov, nv, m)      os_atomic_cmpxchg(p, ov, nv, m)
142 #endif
143 
144 
145 /*!
146  * @enum lck_type_t
147  *
148  * @brief
149  * A one-byte type tag used in byte 3 of locks to be able to identify them.
150  */
151 __enum_decl(lck_type_t, uint8_t, {
152 	LCK_TYPE_NONE           = 0x00,
153 	LCK_TYPE_MUTEX          = 0x22,
154 	LCK_TYPE_RW             = 0x33,
155 	LCK_TYPE_TICKET         = 0x44,
156 	LCK_TYPE_GATE           = 0x55,
157 });
158 
159 
160 /*!
161  * @typedef lck_mtx_mcs_t
162  *
163  * @brief
164  * The type of per-cpu MCS-like nodes used for the mutex acquisition slowpath.
165  *
166  * @discussion
167  * There is one such structure per CPU: such nodes are used with preemption
168  * disabled, and using kernel mutexes in interrupt context isn't allowed.
169  *
170  * The nodes are used not as a lock as in traditional MCS, but to order
171  * waiters. The head of the queue spins against the lock itself, which allows
172  * to release the MCS node once the kernel mutex is acquired.
173  *
174  * Those nodes provide 2 queues:
175  *
176  * 1. an adaptive spin queue that is used to order threads who chose to
177  *    adaptively spin to wait for the lock to become available,
178  *
179  *    This queue is doubly linked, threads can add themselves concurrently,
180  *    the interlock of the mutex is required to dequeue.
181  *
182  * 2. an interlock queue which is more typical MCS.
183  */
184 typedef struct lck_mtx_mcs {
185 	struct lck_mtx_s       *lmm_ilk_current;
186 
187 	struct lck_mtx_mcs     *lmm_ilk_next;
188 	unsigned long           lmm_ilk_ready;
189 
190 	struct lck_mtx_mcs     *lmm_as_next;
191 	unsigned long long      lmm_as_prev;
192 } __attribute__((aligned(64))) * lck_mtx_mcs_t;
193 
194 
195 /*!
196  * @typedef lck_spin_mcs_t
197  *
198  * @brief
199  * The type of per-cpu MCS-like nodes used for various spinlock wait queues.
200  *
201  * @discussion
202  * Unlike the mutex ones, these nodes can be used for spinlocks taken
203  * in interrupt context.
204  */
205 typedef struct lck_spin_mcs {
206 	struct lck_spin_mcs    *lsm_next;
207 	const void             *lsm_lock;
208 	unsigned long           lsm_ready;
209 } *lck_spin_mcs_t;
210 
211 
212 typedef struct lck_mcs {
213 	struct lck_mtx_mcs      mcs_mtx;
214 	volatile unsigned long  mcs_spin_rsv;
215 	struct lck_spin_mcs     mcs_spin[2];
216 } __attribute__((aligned(128))) * lck_mcs_t;
217 
218 
219 PERCPU_DECL(struct lck_mcs, lck_mcs);
220 
221 typedef uint16_t lck_mcs_id_t;
222 
223 #define LCK_MCS_ID_CPU_MASK     0x3fff
224 #define LCK_MCS_ID_SLOT_SHIFT       14
225 #define LCK_MCS_ID_SLOT_MASK    0xc000
226 
227 #define LCK_MCS_SLOT_0               0
228 #define LCK_MCS_SLOT_1               1
229 
230 static inline lck_mcs_id_t
lck_mcs_id_make(int cpu,unsigned long slot)231 lck_mcs_id_make(int cpu, unsigned long slot)
232 {
233 	return (uint16_t)(((slot + 1) << LCK_MCS_ID_SLOT_SHIFT) | cpu);
234 }
235 
236 static inline lck_mcs_id_t
lck_mcs_id_current(unsigned long slot)237 lck_mcs_id_current(unsigned long slot)
238 {
239 	return lck_mcs_id_make(cpu_number(), slot);
240 }
241 
242 static inline uint16_t
lck_mcs_id_cpu(lck_mcs_id_t mcs_id)243 lck_mcs_id_cpu(lck_mcs_id_t mcs_id)
244 {
245 	return mcs_id & LCK_MCS_ID_CPU_MASK;
246 }
247 
248 static inline uint16_t
lck_mcs_id_slot(lck_mcs_id_t mcs_id)249 lck_mcs_id_slot(lck_mcs_id_t mcs_id)
250 {
251 	return (mcs_id >> LCK_MCS_ID_SLOT_SHIFT) - 1;
252 }
253 
254 static inline lck_mcs_t
lck_mcs_get_current(void)255 lck_mcs_get_current(void)
256 {
257 	return PERCPU_GET(lck_mcs);
258 }
259 
260 static inline lck_mcs_t
lck_mcs_get_other(lck_mcs_id_t mcs_id)261 lck_mcs_get_other(lck_mcs_id_t mcs_id)
262 {
263 	vm_offset_t base = other_percpu_base(lck_mcs_id_cpu(mcs_id));
264 
265 	return PERCPU_GET_WITH_BASE(base, lck_mcs);
266 }
267 
268 
269 static inline lck_spin_mcs_t
lck_spin_mcs_decode(lck_mcs_id_t mcs_id)270 lck_spin_mcs_decode(lck_mcs_id_t mcs_id)
271 {
272 	lck_mcs_t other = lck_mcs_get_other(mcs_id);
273 
274 	return &other->mcs_spin[lck_mcs_id_slot(mcs_id)];
275 }
276 
277 typedef struct {
278 	lck_mcs_t               txn_mcs;
279 	lck_spin_mcs_t          txn_slot;
280 	lck_mcs_id_t            txn_mcs_id;
281 } lck_spin_txn_t;
282 
283 static inline lck_spin_txn_t
lck_spin_txn_begin(void * lck)284 lck_spin_txn_begin(void *lck)
285 {
286 	lck_spin_txn_t txn;
287 	unsigned long slot;
288 
289 	txn.txn_mcs = lck_mcs_get_current();
290 	os_compiler_barrier();
291 	slot = txn.txn_mcs->mcs_spin_rsv++;
292 	assert(slot <= LCK_MCS_SLOT_1);
293 	os_compiler_barrier();
294 
295 	txn.txn_mcs_id = lck_mcs_id_current(slot);
296 	txn.txn_slot = &txn.txn_mcs->mcs_spin[slot];
297 	txn.txn_slot->lsm_lock  = lck;
298 
299 	return txn;
300 }
301 
302 static inline bool
lck_spin_txn_enqueue(lck_spin_txn_t * txn,lck_mcs_id_t * tail)303 lck_spin_txn_enqueue(lck_spin_txn_t *txn, lck_mcs_id_t *tail)
304 {
305 	lck_spin_mcs_t  pnode;
306 	lck_mcs_id_t    pidx;
307 
308 	pidx = os_atomic_xchg(tail, txn->txn_mcs_id, release);
309 	if (pidx) {
310 		pnode = lck_spin_mcs_decode(pidx);
311 		os_atomic_store(&pnode->lsm_next, txn->txn_slot, relaxed);
312 		return true;
313 	}
314 
315 	return false;
316 }
317 
318 static inline void
lck_spin_txn_end(lck_spin_txn_t * txn)319 lck_spin_txn_end(lck_spin_txn_t *txn)
320 {
321 	unsigned long   slot = lck_mcs_id_slot(txn->txn_mcs_id);
322 	lck_mcs_t       mcs  = txn->txn_mcs;
323 
324 	*txn->txn_slot = (struct lck_spin_mcs){ };
325 	*txn           = (lck_spin_txn_t){ };
326 
327 	assert(mcs->mcs_spin_rsv == slot + 1);
328 	os_atomic_store(&mcs->mcs_spin_rsv, slot, compiler_acq_rel);
329 }
330 
331 
332 #pragma GCC visibility pop
333 
334 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
335 
336 #endif /* _KERN_LOCKS_INTERNAL_H_ */
337