xref: /xnu-10002.81.5/osfmk/kern/lock_ptr.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #define LOCK_PRIVATE 1
30 
31 #include <mach_ldebug.h>
32 #include <kern/locks_internal.h>
33 #include <kern/lock_stat.h>
34 #include <kern/lock_ptr.h>
35 
36 #include <mach/mach_time.h>
37 #include <mach/machine/sdt.h>
38 #include <mach/vm_param.h>
39 
40 #include <machine/cpu_data.h>
41 #include <machine/machine_cpu.h>
42 
43 
44 #pragma mark hw_lck_ptr_t: helpers
45 
46 static_assert(VM_KERNEL_POINTER_SIGNIFICANT_BITS < HW_LCK_PTR_BITS,
47     "sign extension of lck_ptr_bits does the right thing");
48 
49 static inline void
__hw_lck_ptr_encode(hw_lck_ptr_t * lck,const void * ptr)50 __hw_lck_ptr_encode(hw_lck_ptr_t *lck, const void *ptr)
51 {
52 	lck->lck_ptr_bits = (intptr_t)ptr;
53 #if CONFIG_KERNEL_TAGGING
54 	lck->lck_ptr_tag  = vm_memtag_extract_tag((vm_offset_t)ptr);
55 #endif /* CONFIG_KERNEL_TAGGING */
56 }
57 
58 __abortlike
59 static void
__hw_lck_ptr_invalid_panic(hw_lck_ptr_t * lck)60 __hw_lck_ptr_invalid_panic(hw_lck_ptr_t *lck)
61 {
62 	hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed);
63 
64 	panic("Invalid/destroyed ptr spinlock %p: <%p %d 0x%04x>",
65 	    lck, __hw_lck_ptr_value(tmp), tmp.lck_ptr_locked,
66 	    tmp.lck_ptr_mcs_tail);
67 }
68 
69 __attribute__((always_inline, overloadable))
70 static inline bool
hw_lck_ptr_take_slowpath(hw_lck_ptr_t tmp)71 hw_lck_ptr_take_slowpath(hw_lck_ptr_t tmp)
72 {
73 	hw_lck_ptr_t check_bits = {
74 #if CONFIG_DTRACE
75 		.lck_ptr_stats  = true,
76 #endif /* CONFIG_DTRACE */
77 	};
78 	unsigned long take_slowpath = 0;
79 
80 	take_slowpath = tmp.lck_ptr_value & check_bits.lck_ptr_value;
81 #if CONFIG_DTRACE
82 	take_slowpath |= lockstat_enabled();
83 #endif /* CONFIG_DTRACE */
84 	return take_slowpath;
85 }
86 
87 
88 #pragma mark hw_lck_ptr_t: init/destroy
89 
90 void
hw_lck_ptr_init(hw_lck_ptr_t * lck,void * val,lck_grp_t * grp)91 hw_lck_ptr_init(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
92 {
93 	hw_lck_ptr_t init = { };
94 
95 #if LCK_GRP_USE_ARG
96 	if (grp) {
97 #if CONFIG_DTRACE
98 		if (grp->lck_grp_attr_id & LCK_GRP_ATTR_STAT) {
99 			init.lck_ptr_stats = true;
100 		}
101 #endif /* CONFIG_DTRACE */
102 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
103 	}
104 #endif /* LCK_GRP_USE_ARG */
105 
106 	__hw_lck_ptr_encode(&init, val);
107 	os_atomic_init(lck, init);
108 }
109 
110 void
hw_lck_ptr_destroy(hw_lck_ptr_t * lck,lck_grp_t * grp)111 hw_lck_ptr_destroy(hw_lck_ptr_t *lck, lck_grp_t *grp)
112 {
113 	hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed);
114 
115 	if (tmp.lck_ptr_locked || tmp.lck_ptr_mcs_tail) {
116 		__hw_lck_ptr_invalid_panic(lck);
117 	}
118 #if LCK_GRP_USE_ARG
119 	if (grp) {
120 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
121 	}
122 #endif /* LCK_GRP_USE_ARG */
123 
124 	/* make clients spin forever, and use an invalid MCS ID */
125 	tmp.lck_ptr_locked   = true;
126 	tmp.lck_ptr_stats    = false;
127 	tmp.lck_ptr_mcs_tail = 0xffff;
128 	os_atomic_store(lck, tmp, relaxed);
129 }
130 
131 bool
hw_lck_ptr_held(hw_lck_ptr_t * lck)132 hw_lck_ptr_held(hw_lck_ptr_t *lck)
133 {
134 	return os_atomic_load(lck, relaxed).lck_ptr_locked;
135 }
136 
137 
138 #pragma mark hw_lck_ptr_t: hw_lck_ptr_lock
139 
140 __abortlike
141 static hw_spin_timeout_status_t
hw_lck_ptr_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)142 hw_lck_ptr_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
143 {
144 	hw_lck_ptr_t *lck = _lock;
145 	hw_lck_ptr_t tmp;
146 
147 	tmp  = os_atomic_load(lck, relaxed);
148 	panic("Ptr spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
149 	    "ptr_value: %p, mcs_tail: 0x%04x, "
150 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
151 	    lck, HW_SPIN_TIMEOUT_ARG(to, st),
152 	    __hw_lck_ptr_value(tmp), tmp.lck_ptr_mcs_tail,
153 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
154 }
155 
156 static const struct hw_spin_policy hw_lck_ptr_spin_policy = {
157 	.hwsp_name              = "hw_lck_ptr_lock",
158 	.hwsp_timeout_atomic    = &lock_panic_timeout,
159 	.hwsp_op_timeout        = hw_lck_ptr_timeout_panic,
160 };
161 
162 
163 static void * __attribute__((noinline))
hw_lck_ptr_contended(hw_lck_ptr_t * lck LCK_GRP_ARG (lck_grp_t * grp))164 hw_lck_ptr_contended(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp))
165 {
166 	hw_spin_policy_t  pol = &hw_lck_ptr_spin_policy;
167 	hw_spin_timeout_t to  = hw_spin_compute_timeout(pol);
168 	hw_spin_state_t   ss  = { };
169 
170 	hw_lck_ptr_t      value, nvalue;
171 	lck_mcs_id_t      pidx;
172 	lck_spin_txn_t    txn;
173 
174 #if CONFIG_DTRACE || LOCK_STATS
175 	uint64_t          spin_start;
176 
177 	lck_grp_spin_update_miss(lck LCK_GRP_ARG(grp));
178 	if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) {
179 		spin_start = mach_absolute_time();
180 	}
181 #endif /* LOCK_STATS || CONFIG_DTRACE */
182 
183 	/*
184 	 *	Take a spot in the MCS queue,
185 	 *	and then spin until we're at the head of it.
186 	 */
187 
188 	txn = lck_spin_txn_begin(lck);
189 
190 	pidx = os_atomic_xchg(&lck->lck_ptr_mcs_tail, txn.txn_mcs_id, release);
191 	if (pidx) {
192 		lck_spin_mcs_t pnode;
193 		unsigned long ready;
194 
195 		pnode = lck_spin_mcs_decode(pidx);
196 		os_atomic_store(&pnode->lsm_next, txn.txn_slot, relaxed);
197 
198 		while (!hw_spin_wait_until(&txn.txn_slot->lsm_ready, ready, ready)) {
199 			hw_spin_should_keep_spinning(lck, pol, to, &ss);
200 		}
201 	}
202 
203 	/*
204 	 *	We're now the first in line, wait for the lock bit
205 	 *	to look ready and take it.
206 	 */
207 	do {
208 		while (!hw_spin_wait_until(&lck->lck_ptr_value,
209 		    value.lck_ptr_value, value.lck_ptr_locked == 0)) {
210 			hw_spin_should_keep_spinning(lck, pol, to, &ss);
211 		}
212 
213 		nvalue = value;
214 		nvalue.lck_ptr_locked = true;
215 		if (nvalue.lck_ptr_mcs_tail == txn.txn_mcs_id) {
216 			nvalue.lck_ptr_mcs_tail = 0;
217 		}
218 	} while (!os_atomic_cmpxchg(lck, value, nvalue, acquire));
219 
220 	/*
221 	 *	We now have the lock, let's cleanup the MCS state.
222 	 *
223 	 *	If there is a node after us, notify that it
224 	 *	is at the head of the interlock queue.
225 	 *
226 	 *	Then, clear the MCS node.
227 	 */
228 	if (value.lck_ptr_mcs_tail != txn.txn_mcs_id) {
229 		lck_spin_mcs_t nnode;
230 
231 		while (!hw_spin_wait_until(&txn.txn_slot->lsm_next, nnode, nnode)) {
232 			hw_spin_should_keep_spinning(lck, pol, to, &ss);
233 		}
234 
235 		os_atomic_store(&nnode->lsm_ready, 1, relaxed);
236 	}
237 
238 	lck_spin_txn_end(&txn);
239 
240 #if CONFIG_DTRACE || LOCK_STATS
241 	if (__improbable(spin_start)) {
242 		lck_grp_spin_update_spin(lck LCK_GRP_ARG(grp),
243 		    mach_absolute_time() - spin_start);
244 	}
245 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
246 
247 	return __hw_lck_ptr_value(value);
248 }
249 
250 #if CONFIG_DTRACE
251 __attribute__((noinline))
252 #else /* !CONFIG_DTRACE */
253 __attribute__((always_inline))
254 #endif /* !CONFIG_DTRACE */
255 static void *
hw_lck_ptr_lock_slow(hw_lck_ptr_t * lck,hw_lck_ptr_t tmp LCK_GRP_ARG (lck_grp_t * grp))256 hw_lck_ptr_lock_slow(
257 	hw_lck_ptr_t           *lck,
258 	hw_lck_ptr_t            tmp
259 	LCK_GRP_ARG(lck_grp_t  *grp))
260 {
261 	lck_grp_spin_update_held(lck LCK_GRP_ARG(grp));
262 	return __hw_lck_ptr_value(tmp);
263 }
264 
265 static inline void *
hw_lck_ptr_lock_fastpath(hw_lck_ptr_t * lck LCK_GRP_ARG (lck_grp_t * grp))266 hw_lck_ptr_lock_fastpath(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp))
267 {
268 	hw_lck_ptr_t lock_bit = { .lck_ptr_locked = 1 };
269 	hw_lck_ptr_t tmp;
270 
271 	tmp = os_atomic_load(lck, relaxed);
272 	if (__probable(tmp.lck_ptr_locked == 0 && tmp.lck_ptr_mcs_tail == 0)) {
273 		tmp.lck_ptr_value = os_atomic_or_orig(&lck->lck_ptr_value,
274 		    lock_bit.lck_ptr_value, acquire);
275 		if (__probable(tmp.lck_ptr_locked == 0)) {
276 			if (__probable(!hw_lck_ptr_take_slowpath(tmp))) {
277 				return __hw_lck_ptr_value(tmp);
278 			}
279 			return hw_lck_ptr_lock_slow(lck, tmp LCK_GRP_ARG(grp));
280 		}
281 	}
282 
283 	return hw_lck_ptr_contended(lck LCK_GRP_ARG(grp));
284 }
285 
286 void *
hw_lck_ptr_lock_nopreempt(hw_lck_ptr_t * lck,lck_grp_t * grp)287 hw_lck_ptr_lock_nopreempt(hw_lck_ptr_t *lck, lck_grp_t *grp)
288 {
289 	return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp));
290 }
291 
292 void *
hw_lck_ptr_lock(hw_lck_ptr_t * lck,lck_grp_t * grp)293 hw_lck_ptr_lock(hw_lck_ptr_t *lck, lck_grp_t *grp)
294 {
295 	lock_disable_preemption_for_thread(current_thread());
296 	return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp));
297 }
298 
299 
300 
301 #pragma mark hw_lck_ptr_t: hw_lck_ptr_unlock
302 
303 #if CONFIG_DTRACE
304 __attribute__((noinline))
305 static void
hw_lck_ptr_unlock_slow(hw_lck_ptr_t * lck,bool do_preempt LCK_GRP_ARG (lck_grp_t * grp))306 hw_lck_ptr_unlock_slow(
307 	hw_lck_ptr_t           *lck,
308 	bool                    do_preempt
309 	LCK_GRP_ARG(lck_grp_t  *grp))
310 {
311 	if (do_preempt) {
312 		lock_enable_preemption();
313 	}
314 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lck,
315 	    (uintptr_t)LCK_GRP_PROBEARG(grp));
316 }
317 #endif /* CONFIG_DTRACE */
318 
319 static inline void
hw_lck_ptr_unlock_fastpath(hw_lck_ptr_t * lck,void * val,bool do_preempt LCK_GRP_ARG (lck_grp_t * grp))320 hw_lck_ptr_unlock_fastpath(
321 	hw_lck_ptr_t           *lck,
322 	void                   *val,
323 	bool                    do_preempt
324 	LCK_GRP_ARG(lck_grp_t  *grp))
325 {
326 	hw_lck_ptr_t curv;
327 	hw_lck_ptr_t xorv = { };
328 
329 	/*
330 	 * compute the value to xor in order to unlock + change the pointer
331 	 * value, but leaving the lck_ptr_stats and lck_ptr_mcs_tail unmodified.
332 	 *
333 	 * (the latter might change while we unlock and this avoids a CAS loop.
334 	 */
335 	curv = atomic_load_explicit((hw_lck_ptr_t _Atomic *)lck,
336 	    memory_order_relaxed);
337 
338 	curv.lck_ptr_stats = false;
339 	curv.lck_ptr_mcs_tail = 0;
340 
341 	__hw_lck_ptr_encode(&xorv, val);
342 	xorv.lck_ptr_value ^= curv.lck_ptr_value;
343 
344 	curv.lck_ptr_value =
345 	    os_atomic_xor(&lck->lck_ptr_value, xorv.lck_ptr_value, release);
346 
347 #if CONFIG_DTRACE
348 	if (__improbable(hw_lck_ptr_take_slowpath(curv))) {
349 		return hw_lck_ptr_unlock_slow(lck, do_preempt LCK_GRP_ARG(grp));
350 	}
351 #endif /* CONFIG_DTRACE */
352 
353 	if (do_preempt) {
354 		lock_enable_preemption();
355 	}
356 }
357 
358 void
hw_lck_ptr_unlock_nopreempt(hw_lck_ptr_t * lck,void * val,lck_grp_t * grp)359 hw_lck_ptr_unlock_nopreempt(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
360 {
361 	hw_lck_ptr_unlock_fastpath(lck, val, false LCK_GRP_ARG(grp));
362 }
363 
364 void
hw_lck_ptr_unlock(hw_lck_ptr_t * lck,void * val,lck_grp_t * grp)365 hw_lck_ptr_unlock(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
366 {
367 	hw_lck_ptr_unlock_fastpath(lck, val, true LCK_GRP_ARG(grp));
368 }
369 
370 
371 #pragma mark hw_lck_ptr_t: hw_lck_ptr_wait_for_value
372 
373 static void __attribute__((noinline))
hw_lck_ptr_wait_for_value_contended(hw_lck_ptr_t * lck,void * val LCK_GRP_ARG (lck_grp_t * grp))374 hw_lck_ptr_wait_for_value_contended(
375 	hw_lck_ptr_t           *lck,
376 	void                   *val
377 	LCK_GRP_ARG(lck_grp_t  *grp))
378 {
379 	hw_spin_policy_t  pol = &hw_lck_ptr_spin_policy;
380 	hw_spin_timeout_t to  = hw_spin_compute_timeout(pol);
381 	hw_spin_state_t   ss  = { };
382 	hw_lck_ptr_t      tmp;
383 
384 #if CONFIG_DTRACE || LOCK_STATS
385 	uint64_t          spin_start;
386 
387 	if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) {
388 		spin_start = mach_absolute_time();
389 	}
390 #endif /* LOCK_STATS || CONFIG_DTRACE */
391 
392 	while (__improbable(!hw_spin_wait_until(&lck->lck_ptr_value,
393 	    tmp.lck_ptr_value, __hw_lck_ptr_value(tmp) == val))) {
394 		hw_spin_should_keep_spinning(lck, pol, to, &ss);
395 	}
396 
397 #if CONFIG_DTRACE || LOCK_STATS
398 	if (__improbable(spin_start)) {
399 		lck_grp_spin_update_spin(lck LCK_GRP_ARG(grp),
400 		    mach_absolute_time() - spin_start);
401 	}
402 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
403 
404 	os_atomic_thread_fence(acquire);
405 }
406 
407 void
hw_lck_ptr_wait_for_value(hw_lck_ptr_t * lck,void * val,lck_grp_t * grp)408 hw_lck_ptr_wait_for_value(
409 	hw_lck_ptr_t           *lck,
410 	void                   *val,
411 	lck_grp_t              *grp)
412 {
413 	hw_lck_ptr_t tmp = os_atomic_load(lck, acquire);
414 
415 	if (__probable(__hw_lck_ptr_value(tmp) == val)) {
416 		return;
417 	}
418 
419 	hw_lck_ptr_wait_for_value_contended(lck, val LCK_GRP_ARG(grp));
420 }
421