1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #define LOCK_PRIVATE 1
57 #include <debug.h>
58 #include <kern/locks_internal.h>
59 #include <kern/lock_stat.h>
60 #include <kern/locks.h>
61 #include <kern/zalloc.h>
62 #include <kern/thread.h>
63 #include <kern/processor.h>
64 #include <kern/sched_prim.h>
65 #include <kern/debug.h>
66 #include <machine/atomic.h>
67 #include <machine/machine_cpu.h>
68
69 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76 #if CONFIG_DTRACE
77 #define DTRACE_RW_SHARED 0x0 //reader
78 #define DTRACE_RW_EXCL 0x1 //writer
79 #define DTRACE_NO_FLAG 0x0 //not applicable
80 #endif /* CONFIG_DTRACE */
81
82 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84 #define LCK_RW_LCK_SHARED_CODE 0x102
85 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89 #if __x86_64__
90 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98 #endif
99
100 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107 #ifdef DEBUG_RW
108 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
109 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
110 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
111 #define rw_lock_debug_disabled() (lck_opts_get() & LCK_OPTION_DISABLE_RW_DEBUG)
112
113 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
114 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
115
116 #endif /* DEBUG_RW */
117
118 /*!
119 * @function lck_rw_alloc_init
120 *
121 * @abstract
122 * Allocates and initializes a rw_lock_t.
123 *
124 * @discussion
125 * The function can block. See lck_rw_init() for initialization details.
126 *
127 * @param grp lock group to associate with the lock.
128 * @param attr lock attribute to initialize the lock.
129 *
130 * @returns NULL or the allocated lock
131 */
132 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)133 lck_rw_alloc_init(
134 lck_grp_t *grp,
135 lck_attr_t *attr)
136 {
137 lck_rw_t *lck;
138
139 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
140 lck_rw_init(lck, grp, attr);
141 return lck;
142 }
143
144 /*!
145 * @function lck_rw_init
146 *
147 * @abstract
148 * Initializes a rw_lock_t.
149 *
150 * @discussion
151 * Usage statistics for the lock are going to be added to the lock group provided.
152 *
153 * The lock attribute can be used to specify the lock contention behaviour.
154 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
155 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
156 *
157 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
158 * if the lock is held and a writer starts waiting for the lock, readers will not be able
159 * to acquire the lock until all writers stop contending. Readers could
160 * potentially starve.
161 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
162 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
163 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
164 * starve.
165 *
166 * @param lck lock to initialize.
167 * @param grp lock group to associate with the lock.
168 * @param attr lock attribute to initialize the lock.
169 *
170 */
171 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)172 lck_rw_init(
173 lck_rw_t *lck,
174 lck_grp_t *grp,
175 lck_attr_t *attr)
176 {
177 /* keep this so that the lck_type_t type is referenced for lldb */
178 lck_type_t type = LCK_TYPE_RW;
179
180 if (attr == LCK_ATTR_NULL) {
181 attr = &lck_attr_default;
182 }
183 *lck = (lck_rw_t){
184 .lck_rw_type = type,
185 .lck_rw_can_sleep = true,
186 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
187 };
188 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
189 }
190
191 /*!
192 * @function lck_rw_free
193 *
194 * @abstract
195 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
196 *
197 * @discussion
198 * The lock must be not held by any thread.
199 *
200 * @param lck rw_lock to free.
201 */
202 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)203 lck_rw_free(
204 lck_rw_t *lck,
205 lck_grp_t *grp)
206 {
207 lck_rw_destroy(lck, grp);
208 zfree(KT_LCK_RW, lck);
209 }
210
211 /*!
212 * @function lck_rw_destroy
213 *
214 * @abstract
215 * Destroys a rw_lock previously initialized with lck_rw_init().
216 *
217 * @discussion
218 * The lock must be not held by any thread.
219 *
220 * @param lck rw_lock to destroy.
221 */
222 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)223 lck_rw_destroy(
224 lck_rw_t *lck,
225 lck_grp_t *grp)
226 {
227 if (lck->lck_rw_type != LCK_TYPE_RW ||
228 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
229 panic("Destroying previously destroyed lock %p", lck);
230 }
231 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
232
233 lck->lck_rw_type = LCK_TYPE_NONE;
234 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
235 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
236 }
237
238 #ifdef DEBUG_RW
239
240 /*
241 * Best effort mechanism to debug rw_locks.
242 *
243 * This mechanism is in addition to the owner checks. The owner is set
244 * only when the lock is held in exclusive mode so the checks do not cover
245 * the cases in which the lock is held in shared mode.
246 *
247 * This mechanism tentatively stores the rw_lock acquired and its debug
248 * information on the thread struct.
249 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
250 *
251 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
252 * at the same time. If a thread holds more than this number of rw_locks we
253 * will start losing debug information.
254 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
255 * store the debug information but it will require more memory per thread
256 * and longer lock/unlock time.
257 *
258 * If an empty slot is found for the debug information, we record the lock
259 * otherwise we set the overflow threshold flag.
260 *
261 * If we reached the overflow threshold we might stop asserting because we cannot be sure
262 * anymore if the lock was acquired or not.
263 *
264 * Even if we reached the overflow threshold, we try to store the debug information
265 * for the new locks acquired. This can be useful in core dumps to debug
266 * possible return to userspace without unlocking and to find possible readers
267 * holding the lock.
268 */
269 __startup_func
270 static void
rw_lock_init(void)271 rw_lock_init(void)
272 {
273 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
274 LcksOpts |= LCK_OPTION_DISABLE_RW_DEBUG;
275 }
276 }
277 STARTUP(LOCKS, STARTUP_RANK_FIRST, rw_lock_init);
278
279 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)280 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
281 {
282 int i;
283 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
284 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
285 if (existing->rwlde_lock == lock) {
286 return existing;
287 }
288 }
289
290 return NULL;
291 }
292
293 __abortlike
294 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)295 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
296 {
297 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
298 }
299
300 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)301 find_empty_slot(rw_lock_debug_t *rw_locks_held)
302 {
303 int i;
304 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
305 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
306 if (entry->rwlde_lock == NULL) {
307 return entry;
308 }
309 }
310 rwlock_slot_panic(rw_locks_held);
311 }
312
313 __abortlike
314 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)315 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
316 {
317 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
318 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
319 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
320 }
321
322 __attribute__((noinline))
323 static void
assert_canlock_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)324 assert_canlock_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
325 {
326 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
327 if (__probable(rw_locks_held->rwld_locks_acquired == 0)) {
328 //no locks saved, safe to lock
329 return;
330 }
331
332 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
333 if (__improbable(entry != NULL)) {
334 boolean_t can_be_shared_recursive;
335 if (lck_rw_recursive_shared_assert_74048094) {
336 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
337 } else {
338 /* currently rw_lock_shared is called recursively,
339 * until the code is fixed allow to lock
340 * recursively in shared mode
341 */
342 can_be_shared_recursive = TRUE;
343 }
344 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
345 return;
346 }
347 canlock_rwlock_panic(lock, thread, entry);
348 }
349 }
350
351 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)352 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
353 {
354 if (__improbable(!rw_lock_debug_disabled())) {
355 assert_canlock_rwlock_slow(lock, thread, type);
356 }
357 }
358
359 __abortlike
360 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)361 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
362 {
363 panic("RW lock %p not held by %p", lock, thread);
364 }
365
366 __abortlike
367 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)368 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
369 {
370 if (type == LCK_RW_TYPE_EXCLUSIVE) {
371 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
372 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
373 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
374 } else {
375 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
376 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
377 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
378 }
379 }
380
381 __attribute__((noinline))
382 static void
assert_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)383 assert_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
384 {
385 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
386
387 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
388 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
389 held_rwlock_notheld_panic(lock, thread);
390 }
391 return;
392 }
393
394 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
395 if (__probable(entry != NULL)) {
396 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
397 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
398 } else {
399 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
400 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
401 }
402 }
403 } else {
404 if (rw_locks_held->rwld_overflow == 0) {
405 held_rwlock_notheld_panic(lock, thread);
406 }
407 }
408 }
409
410 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)411 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
412 {
413 if (__improbable(!rw_lock_debug_disabled())) {
414 assert_held_rwlock_slow(lock, thread, type);
415 }
416 }
417
418 __attribute__((noinline))
419 static void
change_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)420 change_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
421 {
422 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
423 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
424 if (rw_locks_held->rwld_overflow == 0) {
425 held_rwlock_notheld_panic(lock, thread);
426 }
427 return;
428 }
429
430 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
431 if (__probable(entry != NULL)) {
432 if (typeFrom == LCK_RW_TYPE_SHARED) {
433 //We are upgrading
434 assertf(entry->rwlde_mode_count == 1,
435 "RW lock %p not held by a single shared when upgrading "
436 "by %p caller %p read %d state 0x%x owner 0x%p ",
437 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
438 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
439 entry->rwlde_mode_count = -1;
440 set_rwlde_caller_packed(entry, caller);
441 } else {
442 //We are downgrading
443 assertf(entry->rwlde_mode_count == -1,
444 "RW lock %p not held in write mode when downgrading "
445 "by %p caller %p read %d state 0x%x owner 0x%p ",
446 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
447 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
448 entry->rwlde_mode_count = 1;
449 set_rwlde_caller_packed(entry, caller);
450 }
451 return;
452 }
453
454 if (rw_locks_held->rwld_overflow == 0) {
455 held_rwlock_notheld_panic(lock, thread);
456 }
457
458 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
459 //array is full
460 return;
461 }
462
463 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
464 null_entry->rwlde_lock = lock;
465 set_rwlde_caller_packed(null_entry, caller);
466 if (typeFrom == LCK_RW_TYPE_SHARED) {
467 null_entry->rwlde_mode_count = -1;
468 } else {
469 null_entry->rwlde_mode_count = 1;
470 }
471 rw_locks_held->rwld_locks_saved++;
472 }
473
474 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)475 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
476 {
477 if (__improbable(!rw_lock_debug_disabled())) {
478 change_held_rwlock_slow(lock, thread, typeFrom, caller);
479 }
480 }
481
482 __abortlike
483 static void
add_held_rwlock_too_many_panic(thread_t thread)484 add_held_rwlock_too_many_panic(thread_t thread)
485 {
486 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
487 }
488
489 static __attribute__((noinline)) void
add_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)490 add_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
491 {
492 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
493 struct rw_lock_debug_entry *null_entry;
494 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
495 add_held_rwlock_too_many_panic(thread);
496 }
497 rw_locks_held->rwld_locks_acquired++;
498
499 if (type == LCK_RW_TYPE_EXCLUSIVE) {
500 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
501 //array is full
502 rw_locks_held->rwld_overflow = 1;
503 return;
504 }
505 null_entry = find_empty_slot(rw_locks_held);
506 null_entry->rwlde_lock = lock;
507 set_rwlde_caller_packed(null_entry, caller);
508 null_entry->rwlde_mode_count = -1;
509 rw_locks_held->rwld_locks_saved++;
510 return;
511 } else {
512 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
513 //array is empty
514 goto add_shared;
515 }
516
517 boolean_t allow_shared_recursive;
518 if (lck_rw_recursive_shared_assert_74048094) {
519 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
520 } else {
521 allow_shared_recursive = TRUE;
522 }
523 if (allow_shared_recursive) {
524 //It could be already locked in shared mode
525 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
526 if (entry != NULL) {
527 assert(entry->rwlde_mode_count > 0);
528 assertf(entry->rwlde_mode_count != INT8_MAX,
529 "RW lock %p with too many recursive shared held "
530 "from %p caller %p read %d state 0x%x owner 0x%p",
531 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
532 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
533 entry->rwlde_mode_count += 1;
534 return;
535 }
536 }
537
538 //none of the locks were a match
539 //try to add a new entry
540 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
541 //array is full
542 rw_locks_held->rwld_overflow = 1;
543 return;
544 }
545
546 add_shared:
547 null_entry = find_empty_slot(rw_locks_held);
548 null_entry->rwlde_lock = lock;
549 set_rwlde_caller_packed(null_entry, caller);
550 null_entry->rwlde_mode_count = 1;
551 rw_locks_held->rwld_locks_saved++;
552 }
553 }
554
555 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)556 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
557 {
558 if (__improbable(!rw_lock_debug_disabled())) {
559 add_held_rwlock_slow(lock, thread, type, caller);
560 }
561 }
562
563 static void
remove_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)564 remove_held_rwlock_slow(lck_rw_t *lock, thread_t thread, lck_rw_type_t type)
565 {
566 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
567 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
568 return;
569 }
570 rw_locks_held->rwld_locks_acquired--;
571
572 if (rw_locks_held->rwld_locks_saved == 0) {
573 assert(rw_locks_held->rwld_overflow == 1);
574 goto out;
575 }
576
577 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
578 if (__probable(entry != NULL)) {
579 if (type == LCK_RW_TYPE_EXCLUSIVE) {
580 assert(entry->rwlde_mode_count == -1);
581 entry->rwlde_mode_count = 0;
582 } else {
583 assert(entry->rwlde_mode_count > 0);
584 entry->rwlde_mode_count--;
585 if (entry->rwlde_mode_count > 0) {
586 goto out;
587 }
588 }
589 entry->rwlde_caller_packed = 0;
590 entry->rwlde_lock = NULL;
591 rw_locks_held->rwld_locks_saved--;
592 } else {
593 assert(rw_locks_held->rwld_overflow == 1);
594 }
595
596 out:
597 if (rw_locks_held->rwld_locks_acquired == 0) {
598 rw_locks_held->rwld_overflow = 0;
599 }
600 return;
601 }
602
603 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)604 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
605 {
606 if (__improbable(!rw_lock_debug_disabled())) {
607 remove_held_rwlock_slow(lock, thread, type);
608 }
609 }
610 #endif /* DEBUG_RW */
611
612 /*
613 * We disable interrupts while holding the RW interlock to prevent an
614 * interrupt from exacerbating hold time.
615 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
616 */
617 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)618 lck_interlock_lock(
619 lck_rw_t *lck)
620 {
621 boolean_t istate;
622
623 istate = ml_set_interrupts_enabled(FALSE);
624 lck_rw_ilk_lock(lck);
625 return istate;
626 }
627
628 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)629 lck_interlock_unlock(
630 lck_rw_t *lck,
631 boolean_t istate)
632 {
633 lck_rw_ilk_unlock(lck);
634 ml_set_interrupts_enabled(istate);
635 }
636
637 /*
638 * compute the deadline to spin against when
639 * waiting for a change of state on a lck_rw_t
640 */
641 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)642 lck_rw_deadline_for_spin(
643 lck_rw_t *lck)
644 {
645 lck_rw_word_t word;
646
647 word.data = ordered_load_rw(lck);
648 if (word.can_sleep) {
649 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
650 /*
651 * there are already threads waiting on this lock... this
652 * implies that they have spun beyond their deadlines waiting for
653 * the desired state to show up so we will not bother spinning at this time...
654 * or
655 * the current number of threads sharing this lock exceeds our capacity to run them
656 * concurrently and since all states we're going to spin for require the rw_shared_count
657 * to be at 0, we'll not bother spinning since the latency for this to happen is
658 * unpredictable...
659 */
660 return mach_absolute_time();
661 }
662 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
663 } else {
664 return mach_absolute_time() + (100000LL * 1000000000LL);
665 }
666 }
667
668 /*
669 * This inline is used when busy-waiting for an rw lock.
670 * If interrupts were disabled when the lock primitive was called,
671 * we poll the IPI handler for pending tlb flushes in x86.
672 */
673 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)674 lck_rw_lock_pause(
675 boolean_t interrupts_enabled)
676 {
677 #if X86_64
678 if (!interrupts_enabled) {
679 handle_pending_TLB_flushes();
680 }
681 cpu_pause();
682 #else
683 (void) interrupts_enabled;
684 wait_for_event();
685 #endif
686 }
687
688 typedef enum __enum_closed {
689 LCK_RW_DRAIN_S_DRAINED = 0,
690 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
691 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
692 LCK_RW_DRAIN_S_TIMED_OUT = 3,
693 } lck_rw_drain_state_t;
694
695 static lck_rw_drain_state_t
696 lck_rw_drain_status(
697 lck_rw_t *lock,
698 uint32_t status_mask,
699 boolean_t wait,
700 bool (^lock_pause)(void))
701 {
702 uint64_t deadline = 0;
703 uint32_t data;
704 boolean_t istate = FALSE;
705
706 if (wait) {
707 deadline = lck_rw_deadline_for_spin(lock);
708 #if __x86_64__
709 istate = ml_get_interrupts_enabled();
710 #endif
711 }
712
713 for (;;) {
714 #if __x86_64__
715 data = os_atomic_load(&lock->lck_rw_data, relaxed);
716 #else
717 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
718 #endif
719 if ((data & status_mask) == 0) {
720 atomic_exchange_abort();
721 return LCK_RW_DRAIN_S_DRAINED;
722 }
723
724 if (!wait) {
725 atomic_exchange_abort();
726 return LCK_RW_DRAIN_S_NOT_DRAINED;
727 }
728
729 lck_rw_lock_pause(istate);
730
731 if (mach_absolute_time() >= deadline) {
732 return LCK_RW_DRAIN_S_TIMED_OUT;
733 }
734
735 if (lock_pause && lock_pause()) {
736 return LCK_RW_DRAIN_S_EARLY_RETURN;
737 }
738 }
739 }
740
741 /*
742 * Spin while interlock is held.
743 */
744 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)745 lck_rw_interlock_spin(
746 lck_rw_t *lock)
747 {
748 uint32_t data, prev;
749
750 for (;;) {
751 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
752 if (data & LCK_RW_INTERLOCK) {
753 #if __x86_64__
754 cpu_pause();
755 #else
756 wait_for_event();
757 #endif
758 } else {
759 atomic_exchange_abort();
760 return;
761 }
762 }
763 }
764
765 #define LCK_RW_GRAB_WANT 0
766 #define LCK_RW_GRAB_SHARED 1
767
768 typedef enum __enum_closed __enum_options {
769 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
770 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
771 LCK_RW_GRAB_F_WAIT = 0x2,
772 } lck_rw_grab_flags_t;
773
774 typedef enum __enum_closed {
775 LCK_RW_GRAB_S_NOT_LOCKED = 0,
776 LCK_RW_GRAB_S_LOCKED = 1,
777 LCK_RW_GRAB_S_EARLY_RETURN = 2,
778 LCK_RW_GRAB_S_TIMED_OUT = 3,
779 } lck_rw_grab_state_t;
780
781 static lck_rw_grab_state_t
782 lck_rw_grab(
783 lck_rw_t *lock,
784 lck_rw_grab_flags_t flags,
785 bool (^lock_pause)(void))
786 {
787 uint64_t deadline = 0;
788 uint32_t data, prev;
789 boolean_t do_exch, istate = FALSE;
790
791 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
792
793 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
794 deadline = lck_rw_deadline_for_spin(lock);
795 #if __x86_64__
796 istate = ml_get_interrupts_enabled();
797 #endif
798 }
799
800 for (;;) {
801 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
802 if (data & LCK_RW_INTERLOCK) {
803 atomic_exchange_abort();
804 lck_rw_interlock_spin(lock);
805 continue;
806 }
807 do_exch = FALSE;
808 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
809 if ((data & LCK_RW_WANT_EXCL) == 0) {
810 data |= LCK_RW_WANT_EXCL;
811 do_exch = TRUE;
812 }
813 } else { // LCK_RW_GRAB_SHARED
814 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
815 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
816 data += LCK_RW_SHARED_READER;
817 do_exch = TRUE;
818 }
819 }
820 if (do_exch) {
821 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
822 return LCK_RW_GRAB_S_LOCKED;
823 }
824 } else {
825 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
826 atomic_exchange_abort();
827 return LCK_RW_GRAB_S_NOT_LOCKED;
828 }
829
830 lck_rw_lock_pause(istate);
831
832 if (mach_absolute_time() >= deadline) {
833 return LCK_RW_GRAB_S_TIMED_OUT;
834 }
835 if (lock_pause && lock_pause()) {
836 return LCK_RW_GRAB_S_EARLY_RETURN;
837 }
838 }
839 }
840 }
841
842 /*
843 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
844 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
845 * should only be called when can_sleep is false.
846 */
847 static void
lck_rw_drop(lck_rw_t * lock,lck_rw_grab_flags_t flags)848 lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
849 {
850 uint32_t data, prev;
851
852 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
853 assert(!lock->lck_rw_can_sleep);
854
855 for (;;) {
856 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
857
858 /* Interlock should never be taken when can_sleep is false. */
859 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
860
861 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
862 data &= ~LCK_RW_WANT_EXCL;
863 } else {
864 data -= LCK_RW_SHARED_READER;
865 }
866
867 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
868 break;
869 }
870
871 cpu_pause();
872 }
873
874 return;
875 }
876
877 static boolean_t
878 lck_rw_lock_exclusive_gen(
879 lck_rw_t *lock,
880 bool (^lock_pause)(void))
881 {
882 __assert_only thread_t self = current_thread();
883 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
884 lck_rw_word_t word;
885 int slept = 0;
886 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
887 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
888 wait_result_t res = 0;
889 boolean_t istate;
890
891 #if CONFIG_DTRACE
892 boolean_t dtrace_ls_initialized = FALSE;
893 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
894 uint64_t wait_interval = 0;
895 int readers_at_sleep = 0;
896 #endif
897
898 assertf(lock->lck_rw_owner != self->ctid,
899 "Lock already held state=0x%x, owner=%p",
900 ordered_load_rw(lock), self);
901
902 #ifdef DEBUG_RW
903 /*
904 * Best effort attempt to check that this thread
905 * is not already holding the lock (this checks read mode too).
906 */
907 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
908 #endif /* DEBUG_RW */
909
910 /*
911 * Try to acquire the lck_rw_want_excl bit.
912 */
913 while (lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
914 #if CONFIG_DTRACE
915 if (dtrace_ls_initialized == FALSE) {
916 dtrace_ls_initialized = TRUE;
917 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
918 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
919 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
920 if (dtrace_ls_enabled) {
921 /*
922 * Either sleeping or spinning is happening,
923 * start a timing of our delay interval now.
924 */
925 readers_at_sleep = lock->lck_rw_shared_count;
926 wait_interval = mach_absolute_time();
927 }
928 }
929 #endif
930
931 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
932 trace_lck, 0, 0, 0, 0);
933
934 grab_state = lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
935
936 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
937 trace_lck, 0, 0, grab_state, 0);
938
939 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
940 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
941 break;
942 }
943 /*
944 * if we get here, the deadline has expired w/o us
945 * being able to grab the lock exclusively
946 * check to see if we're allowed to do a thread_block
947 */
948 word.data = ordered_load_rw(lock);
949 if (word.can_sleep) {
950 istate = lck_interlock_lock(lock);
951 word.data = ordered_load_rw(lock);
952
953 if (word.want_excl) {
954 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
955
956 word.w_waiting = 1;
957 ordered_store_rw(lock, word.data);
958
959 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
960 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
961 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
962 lck_interlock_unlock(lock, istate);
963 if (res == THREAD_WAITING) {
964 res = thread_block(THREAD_CONTINUE_NULL);
965 slept++;
966 }
967 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
968 } else {
969 word.want_excl = 1;
970 ordered_store_rw(lock, word.data);
971 lck_interlock_unlock(lock, istate);
972 break;
973 }
974 }
975 }
976
977 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
978 assert(lock_pause);
979 return FALSE;
980 }
981
982 /*
983 * Wait for readers (and upgrades) to finish...
984 */
985 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
986 #if CONFIG_DTRACE
987 /*
988 * Either sleeping or spinning is happening, start
989 * a timing of our delay interval now. If we set it
990 * to -1 we don't have accurate data so we cannot later
991 * decide to record a dtrace spin or sleep event.
992 */
993 if (dtrace_ls_initialized == FALSE) {
994 dtrace_ls_initialized = TRUE;
995 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
996 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
997 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
998 if (dtrace_ls_enabled) {
999 /*
1000 * Either sleeping or spinning is happening,
1001 * start a timing of our delay interval now.
1002 */
1003 readers_at_sleep = lock->lck_rw_shared_count;
1004 wait_interval = mach_absolute_time();
1005 }
1006 }
1007 #endif
1008
1009 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1010
1011 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
1012
1013 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
1014
1015 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
1016 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1017 break;
1018 }
1019 /*
1020 * if we get here, the deadline has expired w/o us
1021 * being able to grab the lock exclusively
1022 * check to see if we're allowed to do a thread_block
1023 */
1024 word.data = ordered_load_rw(lock);
1025 if (word.can_sleep) {
1026 istate = lck_interlock_lock(lock);
1027 word.data = ordered_load_rw(lock);
1028
1029 if (word.shared_count != 0 || word.want_upgrade) {
1030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1031
1032 word.w_waiting = 1;
1033 ordered_store_rw(lock, word.data);
1034
1035 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1036 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1037 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1038 lck_interlock_unlock(lock, istate);
1039
1040 if (res == THREAD_WAITING) {
1041 res = thread_block(THREAD_CONTINUE_NULL);
1042 slept++;
1043 }
1044 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1045 } else {
1046 lck_interlock_unlock(lock, istate);
1047 /*
1048 * must own the lock now, since we checked for
1049 * readers or upgrade owner behind the interlock
1050 * no need for a call to 'lck_rw_drain_status'
1051 */
1052 break;
1053 }
1054 }
1055 }
1056
1057 #if CONFIG_DTRACE
1058 /*
1059 * Decide what latencies we suffered that are Dtrace events.
1060 * If we have set wait_interval, then we either spun or slept.
1061 * At least we get out from under the interlock before we record
1062 * which is the best we can do here to minimize the impact
1063 * of the tracing.
1064 * If we have set wait_interval to -1, then dtrace was not enabled when we
1065 * started sleeping/spinning so we don't record this event.
1066 */
1067 if (dtrace_ls_enabled == TRUE) {
1068 if (slept == 0) {
1069 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1070 mach_absolute_time() - wait_interval, 1);
1071 } else {
1072 /*
1073 * For the blocking case, we also record if when we blocked
1074 * it was held for read or write, and how many readers.
1075 * Notice that above we recorded this before we dropped
1076 * the interlock so the count is accurate.
1077 */
1078 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1079 mach_absolute_time() - wait_interval, 1,
1080 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1081 }
1082 }
1083 #endif /* CONFIG_DTRACE */
1084
1085 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1086 lck_rw_drop(lock, LCK_RW_GRAB_F_WANT_EXCL);
1087 assert(lock_pause);
1088 return FALSE;
1089 }
1090
1091 #if CONFIG_DTRACE
1092 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1093 #endif /* CONFIG_DTRACE */
1094
1095 return TRUE;
1096 }
1097
1098 static inline void
lck_rw_lock_check_preemption(lck_rw_t * lock __unused)1099 lck_rw_lock_check_preemption(lck_rw_t *lock __unused)
1100 {
1101 assertf((get_preemption_level() == 0 && ml_get_interrupts_enabled()) ||
1102 startup_phase < STARTUP_SUB_EARLY_BOOT ||
1103 current_cpu_datap()->cpu_hibernate ||
1104 ml_is_quiescing() ||
1105 !not_in_kdp,
1106 "%s: attempt to take rwlock %p in non-preemptible or interrupt context: "
1107 "preemption level = %d, interruptible = %d", __func__, lock,
1108 get_preemption_level(), (int)ml_get_interrupts_enabled());
1109 }
1110
1111 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1112 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1113 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1114 /*!
1115 * @function lck_rw_lock_exclusive_check_contended
1116 *
1117 * @abstract
1118 * Locks a rw_lock in exclusive mode.
1119 *
1120 * @discussion
1121 * This routine IS EXPERIMENTAL.
1122 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1123 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1124 *
1125 * @param lock rw_lock to lock.
1126 *
1127 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1128 * otherwise.
1129 */
1130 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1131 lck_rw_lock_exclusive_check_contended(
1132 lck_rw_t *lock)
1133 {
1134 thread_t thread = current_thread();
1135 bool contended = false;
1136
1137 if (lock->lck_rw_can_sleep) {
1138 lck_rw_lock_check_preemption(lock);
1139 lck_rw_lock_count_inc(thread, lock);
1140 } else if (get_preemption_level() == 0) {
1141 panic("Taking non-sleepable RW lock with preemption enabled");
1142 }
1143
1144 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1145 #if CONFIG_DTRACE
1146 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1147 #endif /* CONFIG_DTRACE */
1148 } else {
1149 contended = true;
1150 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1151 }
1152 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1153 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1154 ordered_store_rw_owner(lock, thread->ctid);
1155
1156 #ifdef DEBUG_RW
1157 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1158 #endif /* DEBUG_RW */
1159 return contended;
1160 }
1161
1162 __attribute__((always_inline))
1163 static boolean_t
1164 lck_rw_lock_exclusive_internal_inline(
1165 lck_rw_t *lock,
1166 void *caller,
1167 bool (^lock_pause)(void))
1168 {
1169 #pragma unused(caller)
1170 thread_t thread = current_thread();
1171
1172 if (lock->lck_rw_can_sleep) {
1173 lck_rw_lock_check_preemption(lock);
1174 lck_rw_lock_count_inc(thread, lock);
1175 } else if (get_preemption_level() == 0) {
1176 panic("Taking non-sleepable RW lock with preemption enabled");
1177 }
1178
1179 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1180 #if CONFIG_DTRACE
1181 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1182 #endif /* CONFIG_DTRACE */
1183 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1184 /*
1185 * lck_rw_lock_exclusive_gen() should only return
1186 * early if lock_pause has been passed and
1187 * returns FALSE. lock_pause is exclusive with
1188 * lck_rw_can_sleep().
1189 */
1190 assert(!lock->lck_rw_can_sleep);
1191 return FALSE;
1192 }
1193
1194 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1195 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1196 ordered_store_rw_owner(lock, thread->ctid);
1197
1198 #if DEBUG_RW
1199 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1200 #endif /* DEBUG_RW */
1201
1202 return TRUE;
1203 }
1204
1205 __attribute__((noinline))
1206 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1207 lck_rw_lock_exclusive_internal(
1208 lck_rw_t *lock,
1209 void *caller)
1210 {
1211 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1212 }
1213
1214 /*!
1215 * @function lck_rw_lock_exclusive
1216 *
1217 * @abstract
1218 * Locks a rw_lock in exclusive mode.
1219 *
1220 * @discussion
1221 * This function can block.
1222 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1223 * can acquire it in exclusive mode.
1224 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1225 *
1226 * @param lock rw_lock to lock.
1227 */
1228 void
lck_rw_lock_exclusive(lck_rw_t * lock)1229 lck_rw_lock_exclusive(
1230 lck_rw_t *lock)
1231 {
1232 (void) lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), NULL);
1233 }
1234
1235 /*!
1236 * @function lck_rw_lock_exclusive_b
1237 *
1238 * @abstract
1239 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1240 * and the specified block returns true.
1241 *
1242 * @discussion
1243 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1244 * acquired and the specified block returns true. The block is called
1245 * repeatedly when waiting to acquire the lock.
1246 * Should only be called when the lock cannot sleep (i.e. when
1247 * lock->lck_rw_can_sleep is false).
1248 *
1249 * @param lock rw_lock to lock.
1250 * @param lock_pause block invoked while waiting to acquire lock
1251 *
1252 * @returns Returns TRUE if the lock is successfully taken,
1253 * FALSE if the block returns true and the lock has
1254 * not been acquired.
1255 */
1256 boolean_t
1257 lck_rw_lock_exclusive_b(
1258 lck_rw_t *lock,
1259 bool (^lock_pause)(void))
1260 {
1261 assert(!lock->lck_rw_can_sleep);
1262
1263 return lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), lock_pause);
1264 }
1265
1266 /*
1267 * Routine: lck_rw_lock_shared_gen
1268 * Function:
1269 * Fast path code has determined that this lock
1270 * is held exclusively... this is where we spin/block
1271 * until we can acquire the lock in the shared mode
1272 */
1273 static boolean_t
1274 lck_rw_lock_shared_gen(
1275 lck_rw_t *lck,
1276 bool (^lock_pause)(void))
1277 {
1278 __assert_only thread_t self = current_thread();
1279 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1280 lck_rw_word_t word;
1281 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1282 int slept = 0;
1283 wait_result_t res = 0;
1284 boolean_t istate;
1285
1286 #if CONFIG_DTRACE
1287 uint64_t wait_interval = 0;
1288 int readers_at_sleep = 0;
1289 boolean_t dtrace_ls_initialized = FALSE;
1290 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1291 #endif /* CONFIG_DTRACE */
1292
1293 assertf(lck->lck_rw_owner != self->ctid,
1294 "Lock already held state=0x%x, owner=%p",
1295 ordered_load_rw(lck), self);
1296
1297 #ifdef DEBUG_RW
1298 /*
1299 * Best effort attempt to check that this thread
1300 * is not already holding the lock in shared mode.
1301 */
1302 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1303 #endif
1304
1305 while (lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1306 #if CONFIG_DTRACE
1307 if (dtrace_ls_initialized == FALSE) {
1308 dtrace_ls_initialized = TRUE;
1309 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1310 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1311 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1312 if (dtrace_ls_enabled) {
1313 /*
1314 * Either sleeping or spinning is happening,
1315 * start a timing of our delay interval now.
1316 */
1317 readers_at_sleep = lck->lck_rw_shared_count;
1318 wait_interval = mach_absolute_time();
1319 }
1320 }
1321 #endif
1322
1323 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1324 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1325
1326 grab_state = lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1327
1328 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1329 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1330
1331 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1332 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1333 break;
1334 }
1335
1336 /*
1337 * if we get here, the deadline has expired w/o us
1338 * being able to grab the lock for read
1339 * check to see if we're allowed to do a thread_block
1340 */
1341 if (lck->lck_rw_can_sleep) {
1342 istate = lck_interlock_lock(lck);
1343
1344 word.data = ordered_load_rw(lck);
1345 if ((word.want_excl || word.want_upgrade) &&
1346 ((word.shared_count == 0) || word.priv_excl)) {
1347 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1348 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1349
1350 word.r_waiting = 1;
1351 ordered_store_rw(lck, word.data);
1352
1353 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1354 res = assert_wait(LCK_RW_READER_EVENT(lck),
1355 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1356 lck_interlock_unlock(lck, istate);
1357
1358 if (res == THREAD_WAITING) {
1359 res = thread_block(THREAD_CONTINUE_NULL);
1360 slept++;
1361 }
1362 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1363 trace_lck, res, slept, 0, 0);
1364 } else {
1365 word.shared_count++;
1366 ordered_store_rw(lck, word.data);
1367 lck_interlock_unlock(lck, istate);
1368 break;
1369 }
1370 }
1371 }
1372
1373 #if CONFIG_DTRACE
1374 if (dtrace_ls_enabled == TRUE) {
1375 if (slept == 0) {
1376 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1377 } else {
1378 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1379 mach_absolute_time() - wait_interval, 0,
1380 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1381 }
1382 }
1383 #endif /* CONFIG_DTRACE */
1384
1385 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1386 assert(lock_pause);
1387 return FALSE;
1388 }
1389
1390 #if CONFIG_DTRACE
1391 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1392 #endif /* CONFIG_DTRACE */
1393
1394 return TRUE;
1395 }
1396
1397 __attribute__((always_inline))
1398 static boolean_t
1399 lck_rw_lock_shared_internal_inline(
1400 lck_rw_t *lock,
1401 void *caller,
1402 bool (^lock_pause)(void))
1403 {
1404 #pragma unused(caller)
1405
1406 uint32_t data, prev;
1407 thread_t thread = current_thread();
1408 #ifdef DEBUG_RW
1409 boolean_t check_canlock = TRUE;
1410 #endif
1411
1412 if (lock->lck_rw_can_sleep) {
1413 lck_rw_lock_check_preemption(lock);
1414 lck_rw_lock_count_inc(thread, lock);
1415 } else if (get_preemption_level() == 0) {
1416 panic("Taking non-sleepable RW lock with preemption enabled");
1417 }
1418
1419 for (;;) {
1420 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1421 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1422 atomic_exchange_abort();
1423 if (!lck_rw_lock_shared_gen(lock, lock_pause)) {
1424 /*
1425 * lck_rw_lock_shared_gen() should only return
1426 * early if lock_pause has been passed and
1427 * returns FALSE. lock_pause is exclusive with
1428 * lck_rw_can_sleep().
1429 */
1430 assert(!lock->lck_rw_can_sleep);
1431 return FALSE;
1432 }
1433
1434 goto locked;
1435 }
1436 #ifdef DEBUG_RW
1437 if ((data & LCK_RW_SHARED_MASK) == 0) {
1438 /*
1439 * If the lock is uncontended,
1440 * we do not need to check if we can lock it
1441 */
1442 check_canlock = FALSE;
1443 }
1444 #endif
1445 data += LCK_RW_SHARED_READER;
1446 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1447 break;
1448 }
1449 cpu_pause();
1450 }
1451 #ifdef DEBUG_RW
1452 if (check_canlock) {
1453 /*
1454 * Best effort attempt to check that this thread
1455 * is not already holding the lock (this checks read mode too).
1456 */
1457 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1458 }
1459 #endif
1460 locked:
1461 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1462 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1463
1464 #if CONFIG_DTRACE
1465 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1466 #endif /* CONFIG_DTRACE */
1467
1468 #ifdef DEBUG_RW
1469 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1470 #endif /* DEBUG_RW */
1471
1472 return TRUE;
1473 }
1474
1475 __attribute__((noinline))
1476 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1477 lck_rw_lock_shared_internal(
1478 lck_rw_t *lock,
1479 void *caller)
1480 {
1481 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1482 }
1483
1484 /*!
1485 * @function lck_rw_lock_shared
1486 *
1487 * @abstract
1488 * Locks a rw_lock in shared mode.
1489 *
1490 * @discussion
1491 * This function can block.
1492 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1493 * can acquire it in exclusive mode.
1494 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1495 * the lock without waiting.
1496 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1497 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1498 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1499 * in shared mode.
1500 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1501 *
1502 * @param lock rw_lock to lock.
1503 */
1504 void
lck_rw_lock_shared(lck_rw_t * lock)1505 lck_rw_lock_shared(
1506 lck_rw_t *lock)
1507 {
1508 (void) lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), NULL);
1509 }
1510
1511 /*!
1512 * @function lck_rw_lock_shared_b
1513 *
1514 * @abstract
1515 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1516 * and the specified block returns true.
1517 *
1518 * @discussion
1519 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1520 * acquired and the specified block returns true. The block is called
1521 * repeatedly when waiting to acquire the lock.
1522 * Should only be called when the lock cannot sleep (i.e. when
1523 * lock->lck_rw_can_sleep is false).
1524 *
1525 * @param lock rw_lock to lock.
1526 * @param lock_pause block invoked while waiting to acquire lock
1527 *
1528 * @returns Returns TRUE if the lock is successfully taken,
1529 * FALSE if the block returns true and the lock has
1530 * not been acquired.
1531 */
1532 boolean_t
1533 lck_rw_lock_shared_b(
1534 lck_rw_t *lock,
1535 bool (^lock_pause)(void))
1536 {
1537 assert(!lock->lck_rw_can_sleep);
1538
1539 return lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), lock_pause);
1540 }
1541
1542 /*
1543 * Routine: lck_rw_lock_shared_to_exclusive_failure
1544 * Function:
1545 * Fast path code has already dropped our read
1546 * count and determined that someone else owns 'lck_rw_want_upgrade'
1547 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1548 * all we need to do here is determine if a wakeup is needed
1549 */
1550 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1551 lck_rw_lock_shared_to_exclusive_failure(
1552 lck_rw_t *lck,
1553 uint32_t prior_lock_state)
1554 {
1555 thread_t thread = current_thread();
1556
1557 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1558 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1559 /*
1560 * Someone else has requested upgrade.
1561 * Since we've released the read lock, wake
1562 * him up if he's blocked waiting
1563 */
1564 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1565 }
1566
1567 /* Check if dropping the lock means that we need to unpromote */
1568 if (lck->lck_rw_can_sleep) {
1569 lck_rw_lock_count_dec(thread, lck);
1570 }
1571
1572 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1573 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1574
1575 #ifdef DEBUG_RW
1576 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1577 #endif /* DEBUG_RW */
1578
1579 return FALSE;
1580 }
1581
1582 /*
1583 * Routine: lck_rw_lock_shared_to_exclusive_success
1584 * Function:
1585 * the fast path code has already dropped our read
1586 * count and successfully acquired 'lck_rw_want_upgrade'
1587 * we just need to wait for the rest of the readers to drain
1588 * and then we can return as the exclusive holder of this lock
1589 */
1590 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1591 lck_rw_lock_shared_to_exclusive_success(
1592 lck_rw_t *lock)
1593 {
1594 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1595 int slept = 0;
1596 lck_rw_word_t word;
1597 wait_result_t res;
1598 boolean_t istate;
1599 lck_rw_drain_state_t drain_state;
1600
1601 #if CONFIG_DTRACE
1602 uint64_t wait_interval = 0;
1603 int readers_at_sleep = 0;
1604 boolean_t dtrace_ls_initialized = FALSE;
1605 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1606 #endif
1607
1608 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1609 word.data = ordered_load_rw(lock);
1610 #if CONFIG_DTRACE
1611 if (dtrace_ls_initialized == FALSE) {
1612 dtrace_ls_initialized = TRUE;
1613 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1614 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1615 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1616 if (dtrace_ls_enabled) {
1617 /*
1618 * Either sleeping or spinning is happening,
1619 * start a timing of our delay interval now.
1620 */
1621 readers_at_sleep = word.shared_count;
1622 wait_interval = mach_absolute_time();
1623 }
1624 }
1625 #endif
1626
1627 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1628 trace_lck, word.shared_count, 0, 0, 0);
1629
1630 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1631
1632 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1633 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1634
1635 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1636 break;
1637 }
1638
1639 /*
1640 * if we get here, the spin deadline in lck_rw_wait_on_status()
1641 * has expired w/o the rw_shared_count having drained to 0
1642 * check to see if we're allowed to do a thread_block
1643 */
1644 if (word.can_sleep) {
1645 istate = lck_interlock_lock(lock);
1646
1647 word.data = ordered_load_rw(lock);
1648 if (word.shared_count != 0) {
1649 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1650 trace_lck, word.shared_count, 0, 0, 0);
1651
1652 word.w_waiting = 1;
1653 ordered_store_rw(lock, word.data);
1654
1655 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1656 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1657 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1658 lck_interlock_unlock(lock, istate);
1659
1660 if (res == THREAD_WAITING) {
1661 res = thread_block(THREAD_CONTINUE_NULL);
1662 slept++;
1663 }
1664 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1665 trace_lck, res, slept, 0, 0);
1666 } else {
1667 lck_interlock_unlock(lock, istate);
1668 break;
1669 }
1670 }
1671 }
1672 #if CONFIG_DTRACE
1673 /*
1674 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1675 */
1676 if (dtrace_ls_enabled == TRUE) {
1677 if (slept == 0) {
1678 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1679 } else {
1680 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1681 mach_absolute_time() - wait_interval, 1,
1682 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1683 }
1684 }
1685 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1686 #endif
1687 }
1688
1689 /*!
1690 * @function lck_rw_lock_shared_to_exclusive
1691 *
1692 * @abstract
1693 * Upgrades a rw_lock held in shared mode to exclusive.
1694 *
1695 * @discussion
1696 * This function can block.
1697 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1698 * return with the lock not held.
1699 * The caller needs to hold the lock in shared mode to upgrade it.
1700 *
1701 * @param lock rw_lock already held in shared mode to upgrade.
1702 *
1703 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1704 * If the function was not able to upgrade the lock, the lock will be dropped
1705 * by the function.
1706 */
1707 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1708 lck_rw_lock_shared_to_exclusive(
1709 lck_rw_t *lock)
1710 {
1711 thread_t thread = current_thread();
1712 uint32_t data, prev;
1713
1714 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1715
1716 #if DEBUG_RW
1717 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1718 #endif /* DEBUG_RW */
1719
1720 for (;;) {
1721 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1722 if (data & LCK_RW_INTERLOCK) {
1723 atomic_exchange_abort();
1724 lck_rw_interlock_spin(lock);
1725 continue;
1726 }
1727 if (data & LCK_RW_WANT_UPGRADE) {
1728 data -= LCK_RW_SHARED_READER;
1729 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1730 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1731 }
1732 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1733 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1734 }
1735 } else {
1736 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1737 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1738 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1739 break;
1740 }
1741 }
1742 cpu_pause();
1743 }
1744 /* we now own the WANT_UPGRADE */
1745 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1746 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1747 }
1748
1749 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1750 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1751
1752 ordered_store_rw_owner(lock, thread->ctid);
1753 #if CONFIG_DTRACE
1754 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1755 #endif /* CONFIG_DTRACE */
1756
1757 #if DEBUG_RW
1758 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1759 #endif /* DEBUG_RW */
1760 return TRUE;
1761 }
1762
1763 /*
1764 * Routine: lck_rw_lock_exclusive_to_shared_gen
1765 * Function:
1766 * Fast path has already dropped
1767 * our exclusive state and bumped lck_rw_shared_count
1768 * all we need to do here is determine if anyone
1769 * needs to be awakened.
1770 */
1771 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1772 lck_rw_lock_exclusive_to_shared_gen(
1773 lck_rw_t *lck,
1774 uint32_t prior_lock_state,
1775 void *caller)
1776 {
1777 #pragma unused(caller)
1778 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1779 lck_rw_word_t fake_lck;
1780
1781 /*
1782 * prior_lock state is a snapshot of the 1st word of the
1783 * lock in question... we'll fake up a pointer to it
1784 * and carefully not access anything beyond whats defined
1785 * in the first word of a lck_rw_t
1786 */
1787 fake_lck.data = prior_lock_state;
1788
1789 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1790 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1791
1792 /*
1793 * don't wake up anyone waiting to take the lock exclusively
1794 * since we hold a read count... when the read count drops to 0,
1795 * the writers will be woken.
1796 *
1797 * wake up any waiting readers if we don't have any writers waiting,
1798 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1799 */
1800 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1801 thread_wakeup(LCK_RW_READER_EVENT(lck));
1802 }
1803
1804 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1805 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1806
1807 #if CONFIG_DTRACE
1808 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1809 #endif
1810
1811 #if DEBUG_RW
1812 thread_t thread = current_thread();
1813 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1814 #endif /* DEBUG_RW */
1815 }
1816
1817 /*!
1818 * @function lck_rw_lock_exclusive_to_shared
1819 *
1820 * @abstract
1821 * Downgrades a rw_lock held in exclusive mode to shared.
1822 *
1823 * @discussion
1824 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1825 *
1826 * @param lock rw_lock already held in exclusive mode to downgrade.
1827 */
1828 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1829 lck_rw_lock_exclusive_to_shared(
1830 lck_rw_t *lock)
1831 {
1832 uint32_t data, prev;
1833
1834 assertf(lock->lck_rw_owner == current_thread()->ctid,
1835 "state=0x%x, owner=%p", lock->lck_rw_data,
1836 ctid_get_thread_unsafe(lock->lck_rw_owner));
1837 ordered_store_rw_owner(lock, 0);
1838
1839 for (;;) {
1840 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1841 if (data & LCK_RW_INTERLOCK) {
1842 atomic_exchange_abort();
1843 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1844 continue;
1845 }
1846 data += LCK_RW_SHARED_READER;
1847 if (data & LCK_RW_WANT_UPGRADE) {
1848 data &= ~(LCK_RW_WANT_UPGRADE);
1849 } else {
1850 data &= ~(LCK_RW_WANT_EXCL);
1851 }
1852 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1853 data &= ~(LCK_RW_W_WAITING);
1854 }
1855 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1856 break;
1857 }
1858 cpu_pause();
1859 }
1860 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1861 }
1862
1863 /*
1864 * Very sad hack, but the codegen for lck_rw_lock
1865 * is very unhappy with the combination of __builtin_return_address()
1866 * and a noreturn function. For some reason it adds more frames
1867 * than it should. rdar://76570684
1868 */
1869 void
1870 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1871 #pragma clang diagnostic push
1872 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1873 __attribute__((noinline, weak))
1874 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1875 _lck_rw_lock_type_panic(
1876 lck_rw_t *lck,
1877 lck_rw_type_t lck_rw_type)
1878 {
1879 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1880 }
1881 #pragma clang diagnostic pop
1882
1883 /*!
1884 * @function lck_rw_lock
1885 *
1886 * @abstract
1887 * Locks a rw_lock with the specified type.
1888 *
1889 * @discussion
1890 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1891 *
1892 * @param lck rw_lock to lock.
1893 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1894 */
1895 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1896 lck_rw_lock(
1897 lck_rw_t *lck,
1898 lck_rw_type_t lck_rw_type)
1899 {
1900 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1901 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1902 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1903 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1904 }
1905 _lck_rw_lock_type_panic(lck, lck_rw_type);
1906 }
1907
1908 __attribute__((always_inline))
1909 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1910 lck_rw_try_lock_shared_internal_inline(
1911 lck_rw_t *lock,
1912 void *caller)
1913 {
1914 #pragma unused(caller)
1915
1916 uint32_t data, prev;
1917 thread_t thread = current_thread();
1918 #ifdef DEBUG_RW
1919 boolean_t check_canlock = TRUE;
1920 #endif
1921
1922 for (;;) {
1923 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1924 if (data & LCK_RW_INTERLOCK) {
1925 atomic_exchange_abort();
1926 lck_rw_interlock_spin(lock);
1927 continue;
1928 }
1929 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1930 atomic_exchange_abort();
1931 return FALSE; /* lock is busy */
1932 }
1933 #ifdef DEBUG_RW
1934 if ((data & LCK_RW_SHARED_MASK) == 0) {
1935 /*
1936 * If the lock is uncontended,
1937 * we do not need to check if we can lock it
1938 */
1939 check_canlock = FALSE;
1940 }
1941 #endif
1942 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1943 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1944 break;
1945 }
1946 cpu_pause();
1947 }
1948 #ifdef DEBUG_RW
1949 if (check_canlock) {
1950 /*
1951 * Best effort attempt to check that this thread
1952 * is not already holding the lock (this checks read mode too).
1953 */
1954 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1955 }
1956 #endif
1957 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1958 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1959
1960 if (lock->lck_rw_can_sleep) {
1961 lck_rw_lock_count_inc(thread, lock);
1962 } else if (get_preemption_level() == 0) {
1963 panic("Taking non-sleepable RW lock with preemption enabled");
1964 }
1965
1966 #if CONFIG_DTRACE
1967 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1968 #endif /* CONFIG_DTRACE */
1969
1970 #ifdef DEBUG_RW
1971 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1972 #endif /* DEBUG_RW */
1973 return TRUE;
1974 }
1975
1976 __attribute__((noinline))
1977 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1978 lck_rw_try_lock_shared_internal(
1979 lck_rw_t *lock,
1980 void *caller)
1981 {
1982 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1983 }
1984
1985 /*!
1986 * @function lck_rw_try_lock_shared
1987 *
1988 * @abstract
1989 * Tries to locks a rw_lock in read mode.
1990 *
1991 * @discussion
1992 * This function will return and not block in case the lock is already held.
1993 * See lck_rw_lock_shared for more details.
1994 *
1995 * @param lock rw_lock to lock.
1996 *
1997 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1998 */
1999 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)2000 lck_rw_try_lock_shared(
2001 lck_rw_t *lock)
2002 {
2003 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
2004 }
2005
2006 __attribute__((always_inline))
2007 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)2008 lck_rw_try_lock_exclusive_internal_inline(
2009 lck_rw_t *lock,
2010 void *caller)
2011 {
2012 #pragma unused(caller)
2013 uint32_t data, prev;
2014
2015 for (;;) {
2016 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
2017 if (data & LCK_RW_INTERLOCK) {
2018 atomic_exchange_abort();
2019 lck_rw_interlock_spin(lock);
2020 continue;
2021 }
2022 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
2023 atomic_exchange_abort();
2024 return FALSE;
2025 }
2026 data |= LCK_RW_WANT_EXCL;
2027 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
2028 break;
2029 }
2030 cpu_pause();
2031 }
2032 thread_t thread = current_thread();
2033
2034 if (lock->lck_rw_can_sleep) {
2035 lck_rw_lock_count_inc(thread, lock);
2036 } else if (get_preemption_level() == 0) {
2037 panic("Taking non-sleepable RW lock with preemption enabled");
2038 }
2039
2040 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2041 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2042
2043 ordered_store_rw_owner(lock, thread->ctid);
2044 #if CONFIG_DTRACE
2045 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2046 #endif /* CONFIG_DTRACE */
2047
2048 #ifdef DEBUG_RW
2049 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2050 #endif /* DEBUG_RW */
2051 return TRUE;
2052 }
2053
2054 __attribute__((noinline))
2055 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)2056 lck_rw_try_lock_exclusive_internal(
2057 lck_rw_t *lock,
2058 void *caller)
2059 {
2060 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2061 }
2062
2063 /*!
2064 * @function lck_rw_try_lock_exclusive
2065 *
2066 * @abstract
2067 * Tries to locks a rw_lock in write mode.
2068 *
2069 * @discussion
2070 * This function will return and not block in case the lock is already held.
2071 * See lck_rw_lock_exclusive for more details.
2072 *
2073 * @param lock rw_lock to lock.
2074 *
2075 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2076 */
2077 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)2078 lck_rw_try_lock_exclusive(
2079 lck_rw_t *lock)
2080 {
2081 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
2082 }
2083
2084 /*
2085 * Very sad hack, but the codegen for lck_rw_try_lock
2086 * is very unhappy with the combination of __builtin_return_address()
2087 * and a noreturn function. For some reason it adds more frames
2088 * than it should. rdar://76570684
2089 */
2090 boolean_t
2091 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2092 #pragma clang diagnostic push
2093 #pragma clang diagnostic ignored "-Wmissing-noreturn"
2094 __attribute__((noinline, weak))
2095 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2096 _lck_rw_try_lock_type_panic(
2097 lck_rw_t *lck,
2098 lck_rw_type_t lck_rw_type)
2099 {
2100 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2101 }
2102 #pragma clang diagnostic pop
2103
2104 /*!
2105 * @function lck_rw_try_lock
2106 *
2107 * @abstract
2108 * Tries to locks a rw_lock with the specified type.
2109 *
2110 * @discussion
2111 * This function will return and not wait/block in case the lock is already held.
2112 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2113 *
2114 * @param lck rw_lock to lock.
2115 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2116 *
2117 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2118 */
2119 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2120 lck_rw_try_lock(
2121 lck_rw_t *lck,
2122 lck_rw_type_t lck_rw_type)
2123 {
2124 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2125 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
2126 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2127 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
2128 }
2129 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2130 }
2131
2132 /*
2133 * Routine: lck_rw_done_gen
2134 *
2135 * prior_lock_state is the value in the 1st
2136 * word of the lock at the time of a successful
2137 * atomic compare and exchange with the new value...
2138 * it represents the state of the lock before we
2139 * decremented the rw_shared_count or cleared either
2140 * rw_want_upgrade or rw_want_write and
2141 * the lck_x_waiting bits... since the wrapper
2142 * routine has already changed the state atomically,
2143 * we just need to decide if we should
2144 * wake up anyone and what value to return... we do
2145 * this by examining the state of the lock before
2146 * we changed it
2147 */
2148 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)2149 lck_rw_done_gen(
2150 lck_rw_t *lck,
2151 uint32_t prior_lock_state)
2152 {
2153 lck_rw_word_t fake_lck;
2154 lck_rw_type_t lock_type;
2155 thread_t thread;
2156
2157 /*
2158 * prior_lock state is a snapshot of the 1st word of the
2159 * lock in question... we'll fake up a pointer to it
2160 * and carefully not access anything beyond whats defined
2161 * in the first word of a lck_rw_t
2162 */
2163 fake_lck.data = prior_lock_state;
2164
2165 if (fake_lck.shared_count <= 1) {
2166 if (fake_lck.w_waiting) {
2167 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2168 }
2169
2170 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2171 thread_wakeup(LCK_RW_READER_EVENT(lck));
2172 }
2173 }
2174 if (fake_lck.shared_count) {
2175 lock_type = LCK_RW_TYPE_SHARED;
2176 } else {
2177 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2178 }
2179
2180 /* Check if dropping the lock means that we need to unpromote */
2181 thread = current_thread();
2182 if (fake_lck.can_sleep) {
2183 lck_rw_lock_count_dec(thread, lck);
2184 }
2185
2186 #if CONFIG_DTRACE
2187 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2188 #endif
2189
2190 #ifdef DEBUG_RW
2191 remove_held_rwlock(lck, thread, lock_type);
2192 #endif /* DEBUG_RW */
2193 return lock_type;
2194 }
2195
2196 /*!
2197 * @function lck_rw_done
2198 *
2199 * @abstract
2200 * Force unlocks a rw_lock without consistency checks.
2201 *
2202 * @discussion
2203 * Do not use unless sure you can avoid consistency checks.
2204 *
2205 * @param lock rw_lock to unlock.
2206 */
2207 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2208 lck_rw_done(
2209 lck_rw_t *lock)
2210 {
2211 uint32_t data, prev;
2212 boolean_t once = FALSE;
2213
2214 #ifdef DEBUG_RW
2215 /*
2216 * Best effort attempt to check that this thread
2217 * is holding the lock.
2218 */
2219 thread_t thread = current_thread();
2220 assert_held_rwlock(lock, thread, 0);
2221 #endif /* DEBUG_RW */
2222 for (;;) {
2223 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2224 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2225 atomic_exchange_abort();
2226 lck_rw_interlock_spin(lock);
2227 continue;
2228 }
2229 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2230 assertf(lock->lck_rw_owner == 0,
2231 "state=0x%x, owner=%p", lock->lck_rw_data,
2232 ctid_get_thread_unsafe(lock->lck_rw_owner));
2233 data -= LCK_RW_SHARED_READER;
2234 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2235 goto check_waiters;
2236 }
2237 } else { /* if reader count == 0, must be exclusive lock */
2238 if (data & LCK_RW_WANT_UPGRADE) {
2239 data &= ~(LCK_RW_WANT_UPGRADE);
2240 } else {
2241 if (data & LCK_RW_WANT_EXCL) {
2242 data &= ~(LCK_RW_WANT_EXCL);
2243 } else { /* lock is not 'owned', panic */
2244 panic("Releasing non-exclusive RW lock without a reader refcount!");
2245 }
2246 }
2247 if (!once) {
2248 // Only check for holder and clear it once
2249 assertf(lock->lck_rw_owner == current_thread()->ctid,
2250 "state=0x%x, owner=%p", lock->lck_rw_data,
2251 ctid_get_thread_unsafe(lock->lck_rw_owner));
2252 ordered_store_rw_owner(lock, 0);
2253 once = TRUE;
2254 }
2255 check_waiters:
2256 /*
2257 * test the original values to match what
2258 * lck_rw_done_gen is going to do to determine
2259 * which wakeups need to happen...
2260 *
2261 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2262 */
2263 if (prev & LCK_RW_W_WAITING) {
2264 data &= ~(LCK_RW_W_WAITING);
2265 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2266 data &= ~(LCK_RW_R_WAITING);
2267 }
2268 } else {
2269 data &= ~(LCK_RW_R_WAITING);
2270 }
2271 }
2272 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2273 break;
2274 }
2275 cpu_pause();
2276 }
2277 return lck_rw_done_gen(lock, prev);
2278 }
2279
2280 /*!
2281 * @function lck_rw_unlock_shared
2282 *
2283 * @abstract
2284 * Unlocks a rw_lock previously locked in shared mode.
2285 *
2286 * @discussion
2287 * The same thread that locked the lock needs to unlock it.
2288 *
2289 * @param lck rw_lock held in shared mode to unlock.
2290 */
2291 void
lck_rw_unlock_shared(lck_rw_t * lck)2292 lck_rw_unlock_shared(
2293 lck_rw_t *lck)
2294 {
2295 lck_rw_type_t ret;
2296
2297 assertf(lck->lck_rw_owner == 0,
2298 "state=0x%x, owner=%p", lck->lck_rw_data,
2299 ctid_get_thread_unsafe(lck->lck_rw_owner));
2300 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2301 ret = lck_rw_done(lck);
2302
2303 if (ret != LCK_RW_TYPE_SHARED) {
2304 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2305 }
2306 }
2307
2308 /*!
2309 * @function lck_rw_unlock_exclusive
2310 *
2311 * @abstract
2312 * Unlocks a rw_lock previously locked in exclusive mode.
2313 *
2314 * @discussion
2315 * The same thread that locked the lock needs to unlock it.
2316 *
2317 * @param lck rw_lock held in exclusive mode to unlock.
2318 */
2319 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2320 lck_rw_unlock_exclusive(
2321 lck_rw_t *lck)
2322 {
2323 lck_rw_type_t ret;
2324
2325 assertf(lck->lck_rw_owner == current_thread()->ctid,
2326 "state=0x%x, owner=%p", lck->lck_rw_data,
2327 ctid_get_thread_unsafe(lck->lck_rw_owner));
2328 ret = lck_rw_done(lck);
2329
2330 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2331 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2332 }
2333 }
2334
2335 /*!
2336 * @function lck_rw_unlock
2337 *
2338 * @abstract
2339 * Unlocks a rw_lock previously locked with lck_rw_type.
2340 *
2341 * @discussion
2342 * The lock must be unlocked by the same thread it was locked from.
2343 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2344 * holding the lock.
2345 *
2346 * @param lck rw_lock to unlock.
2347 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2348 */
2349 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2350 lck_rw_unlock(
2351 lck_rw_t *lck,
2352 lck_rw_type_t lck_rw_type)
2353 {
2354 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2355 lck_rw_unlock_shared(lck);
2356 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2357 lck_rw_unlock_exclusive(lck);
2358 } else {
2359 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2360 }
2361 }
2362
2363 /*!
2364 * @function lck_rw_assert
2365 *
2366 * @abstract
2367 * Asserts the rw_lock is held.
2368 *
2369 * @discussion
2370 * read-write locks do not have a concept of ownership when held in shared mode,
2371 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2372 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2373 * this function can be more accurate.
2374 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2375 * LCK_RW_ASSERT_NOTHELD.
2376 *
2377 * @param lck rw_lock to check.
2378 * @param type assert type
2379 */
2380 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2381 lck_rw_assert(
2382 lck_rw_t *lck,
2383 unsigned int type)
2384 {
2385 thread_t thread = current_thread();
2386
2387 switch (type) {
2388 case LCK_RW_ASSERT_SHARED:
2389 if ((lck->lck_rw_shared_count != 0) &&
2390 (lck->lck_rw_owner == 0)) {
2391 #if DEBUG_RW
2392 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2393 #endif /* DEBUG_RW */
2394 return;
2395 }
2396 break;
2397 case LCK_RW_ASSERT_EXCLUSIVE:
2398 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2399 (lck->lck_rw_shared_count == 0) &&
2400 (lck->lck_rw_owner == thread->ctid)) {
2401 #if DEBUG_RW
2402 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2403 #endif /* DEBUG_RW */
2404 return;
2405 }
2406 break;
2407 case LCK_RW_ASSERT_HELD:
2408 if (lck->lck_rw_shared_count != 0) {
2409 #if DEBUG_RW
2410 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2411 #endif /* DEBUG_RW */
2412 return; // Held shared
2413 }
2414 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2415 (lck->lck_rw_owner == thread->ctid)) {
2416 #if DEBUG_RW
2417 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2418 #endif /* DEBUG_RW */
2419 return; // Held exclusive
2420 }
2421 break;
2422 case LCK_RW_ASSERT_NOTHELD:
2423 if ((lck->lck_rw_shared_count == 0) &&
2424 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2425 (lck->lck_rw_owner == 0)) {
2426 #ifdef DEBUG_RW
2427 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2428 #endif /* DEBUG_RW */
2429 return;
2430 }
2431 break;
2432 default:
2433 break;
2434 }
2435 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2436 }
2437
2438 /*!
2439 * @function kdp_lck_rw_lock_is_acquired_exclusive
2440 *
2441 * @abstract
2442 * Checks if a rw_lock is held exclusevely.
2443 *
2444 * @discussion
2445 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2446 *
2447 * @param lck lock to check
2448 *
2449 * @returns TRUE if the lock is held exclusevely
2450 */
2451 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2452 kdp_lck_rw_lock_is_acquired_exclusive(
2453 lck_rw_t *lck)
2454 {
2455 if (not_in_kdp) {
2456 panic("panic: rw lock exclusive check done outside of kernel debugger");
2457 }
2458 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2459 }
2460
2461 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2462 kdp_rwlck_find_owner(
2463 __unused struct waitq *waitq,
2464 event64_t event,
2465 thread_waitinfo_t *waitinfo)
2466 {
2467 lck_rw_t *rwlck = NULL;
2468 switch (waitinfo->wait_type) {
2469 case kThreadWaitKernelRWLockRead:
2470 rwlck = READ_EVENT_TO_RWLOCK(event);
2471 break;
2472 case kThreadWaitKernelRWLockWrite:
2473 case kThreadWaitKernelRWLockUpgrade:
2474 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2475 break;
2476 default:
2477 panic("%s was called with an invalid blocking type", __FUNCTION__);
2478 break;
2479 }
2480 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2481 waitinfo->owner = thread_tid(ctid_get_thread(rwlck->lck_rw_owner));
2482 }
2483
2484 /*!
2485 * @function lck_rw_lock_would_yield_shared
2486 *
2487 * @abstract
2488 * Check whether a rw_lock currently held in shared mode would be yielded
2489 *
2490 * @discussion
2491 * This function can be used when lck_rw_lock_yield_shared() would be
2492 * inappropriate due to the need to perform additional housekeeping
2493 * prior to any yield or when the caller may wish to prematurely terminate
2494 * an operation rather than resume it after regaining the lock.
2495 *
2496 * @param lck rw_lock already held in shared mode to yield.
2497 *
2498 * @returns TRUE if the lock would yield, FALSE otherwise
2499 */
2500 bool
lck_rw_lock_would_yield_shared(lck_rw_t * lck)2501 lck_rw_lock_would_yield_shared(
2502 lck_rw_t *lck)
2503 {
2504 lck_rw_word_t word;
2505
2506 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2507
2508 word.data = ordered_load_rw(lck);
2509 if (word.want_excl || word.want_upgrade) {
2510 return true;
2511 }
2512
2513 return false;
2514 }
2515
2516 /*!
2517 * @function lck_rw_lock_yield_shared
2518 *
2519 * @abstract
2520 * Yields a rw_lock held in shared mode.
2521 *
2522 * @discussion
2523 * This function can block.
2524 * Yields the lock in case there are writers waiting.
2525 * The yield will unlock, block, and re-lock the lock in shared mode.
2526 *
2527 * @param lck rw_lock already held in shared mode to yield.
2528 * @param force_yield if set to true it will always yield irrespective of the lock status
2529 *
2530 * @returns TRUE if the lock was yield, FALSE otherwise
2531 */
2532 bool
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2533 lck_rw_lock_yield_shared(
2534 lck_rw_t *lck,
2535 boolean_t force_yield)
2536 {
2537 if (lck_rw_lock_would_yield_shared(lck) || force_yield) {
2538 lck_rw_unlock_shared(lck);
2539 mutex_pause(2);
2540 lck_rw_lock_shared(lck);
2541 return true;
2542 }
2543
2544 return false;
2545 }
2546
2547 /*!
2548 * @function lck_rw_lock_would_yield_exclusive
2549 *
2550 * @abstract
2551 * Check whether a rw_lock currently held in exclusive mode would be yielded
2552 *
2553 * @discussion
2554 * This function can be used when lck_rw_lock_yield_exclusive would be
2555 * inappropriate due to the need to perform additional housekeeping
2556 * prior to any yield or when the caller may wish to prematurely terminate
2557 * an operation rather than resume it after regaining the lock.
2558 *
2559 * @param lck rw_lock already held in exclusive mode to yield.
2560 * @param mode when to yield.
2561 *
2562 * @returns TRUE if the lock would yield, FALSE otherwise
2563 */
2564 bool
lck_rw_lock_would_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2565 lck_rw_lock_would_yield_exclusive(
2566 lck_rw_t *lck,
2567 lck_rw_yield_t mode)
2568 {
2569 lck_rw_word_t word;
2570 bool yield = false;
2571
2572 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2573
2574 if (mode == LCK_RW_YIELD_ALWAYS) {
2575 yield = true;
2576 } else {
2577 word.data = ordered_load_rw(lck);
2578 if (word.w_waiting) {
2579 yield = true;
2580 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2581 yield = (word.r_waiting != 0);
2582 }
2583 }
2584
2585 return yield;
2586 }
2587
2588 /*!
2589 * @function lck_rw_lock_yield_exclusive
2590 *
2591 * @abstract
2592 * Yields a rw_lock held in exclusive mode.
2593 *
2594 * @discussion
2595 * This function can block.
2596 * Yields the lock in case there are writers waiting.
2597 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2598 *
2599 * @param lck rw_lock already held in exclusive mode to yield.
2600 * @param mode when to yield.
2601 *
2602 * @returns TRUE if the lock was yield, FALSE otherwise
2603 */
2604 bool
lck_rw_lock_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2605 lck_rw_lock_yield_exclusive(
2606 lck_rw_t *lck,
2607 lck_rw_yield_t mode)
2608 {
2609 bool yield = lck_rw_lock_would_yield_exclusive(lck, mode);
2610
2611 if (yield) {
2612 lck_rw_unlock_exclusive(lck);
2613 mutex_pause(2);
2614 lck_rw_lock_exclusive(lck);
2615 }
2616
2617 return yield;
2618 }
2619
2620 /*!
2621 * @function lck_rw_sleep
2622 *
2623 * @abstract
2624 * Assert_wait on an event while holding the rw_lock.
2625 *
2626 * @discussion
2627 * the flags can decide how to re-acquire the lock upon wake up
2628 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2629 * and if the priority needs to be kept boosted until the lock is
2630 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2631 *
2632 * @param lck rw_lock to use to synch the assert_wait.
2633 * @param lck_sleep_action flags.
2634 * @param event event to assert_wait on.
2635 * @param interruptible wait type.
2636 */
2637 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2638 lck_rw_sleep(
2639 lck_rw_t *lck,
2640 lck_sleep_action_t lck_sleep_action,
2641 event_t event,
2642 wait_interrupt_t interruptible)
2643 {
2644 wait_result_t res;
2645 lck_rw_type_t lck_rw_type;
2646 thread_pri_floor_t token;
2647
2648 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2649 panic("Invalid lock sleep action %x", lck_sleep_action);
2650 }
2651
2652 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2653 /*
2654 * Although we are dropping the RW lock, the intent in most cases
2655 * is that this thread remains as an observer, since it may hold
2656 * some secondary resource, but must yield to avoid deadlock. In
2657 * this situation, make sure that the thread is boosted to the
2658 * ceiling while blocked, so that it can re-acquire the
2659 * RW lock at that priority.
2660 */
2661 token = thread_priority_floor_start();
2662 }
2663
2664 res = assert_wait(event, interruptible);
2665 if (res == THREAD_WAITING) {
2666 lck_rw_type = lck_rw_done(lck);
2667 res = thread_block(THREAD_CONTINUE_NULL);
2668 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2669 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2670 lck_rw_lock(lck, lck_rw_type);
2671 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2672 lck_rw_lock_exclusive(lck);
2673 } else {
2674 lck_rw_lock_shared(lck);
2675 }
2676 }
2677 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2678 (void)lck_rw_done(lck);
2679 }
2680
2681 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2682 thread_priority_floor_end(&token);
2683 }
2684
2685 return res;
2686 }
2687
2688 /*!
2689 * @function lck_rw_sleep_deadline
2690 *
2691 * @abstract
2692 * Assert_wait_deadline on an event while holding the rw_lock.
2693 *
2694 * @discussion
2695 * the flags can decide how to re-acquire the lock upon wake up
2696 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2697 * and if the priority needs to be kept boosted until the lock is
2698 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2699 *
2700 * @param lck rw_lock to use to synch the assert_wait.
2701 * @param lck_sleep_action flags.
2702 * @param event event to assert_wait on.
2703 * @param interruptible wait type.
2704 * @param deadline maximum time after which being woken up
2705 */
2706 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2707 lck_rw_sleep_deadline(
2708 lck_rw_t *lck,
2709 lck_sleep_action_t lck_sleep_action,
2710 event_t event,
2711 wait_interrupt_t interruptible,
2712 uint64_t deadline)
2713 {
2714 wait_result_t res;
2715 lck_rw_type_t lck_rw_type;
2716 thread_pri_floor_t token;
2717
2718 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2719 panic("Invalid lock sleep action %x", lck_sleep_action);
2720 }
2721
2722 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2723 token = thread_priority_floor_start();
2724 }
2725
2726 res = assert_wait_deadline(event, interruptible, deadline);
2727 if (res == THREAD_WAITING) {
2728 lck_rw_type = lck_rw_done(lck);
2729 res = thread_block(THREAD_CONTINUE_NULL);
2730 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2731 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2732 lck_rw_lock(lck, lck_rw_type);
2733 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2734 lck_rw_lock_exclusive(lck);
2735 } else {
2736 lck_rw_lock_shared(lck);
2737 }
2738 }
2739 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2740 (void)lck_rw_done(lck);
2741 }
2742
2743 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2744 thread_priority_floor_end(&token);
2745 }
2746
2747 return res;
2748 }
2749
2750 /*
2751 * Reader-writer lock promotion
2752 *
2753 * We support a limited form of reader-writer
2754 * lock promotion whose effects are:
2755 *
2756 * * Qualifying threads have decay disabled
2757 * * Scheduler priority is reset to a floor of
2758 * of their statically assigned priority
2759 * or MINPRI_RWLOCK
2760 *
2761 * The rationale is that lck_rw_ts do not have
2762 * a single owner, so we cannot apply a directed
2763 * priority boost from all waiting threads
2764 * to all holding threads without maintaining
2765 * lists of all shared owners and all waiting
2766 * threads for every lock.
2767 *
2768 * Instead (and to preserve the uncontended fast-
2769 * path), acquiring (or attempting to acquire)
2770 * a RW lock in shared or exclusive lock increments
2771 * a per-thread counter. Only if that thread stops
2772 * making forward progress (for instance blocking
2773 * on a mutex, or being preempted) do we consult
2774 * the counter and apply the priority floor.
2775 * When the thread becomes runnable again (or in
2776 * the case of preemption it never stopped being
2777 * runnable), it has the priority boost and should
2778 * be in a good position to run on the CPU and
2779 * release all RW locks (at which point the priority
2780 * boost is cleared).
2781 *
2782 * Care must be taken to ensure that priority
2783 * boosts are not retained indefinitely, since unlike
2784 * mutex priority boosts (where the boost is tied
2785 * to the mutex lifecycle), the boost is tied
2786 * to the thread and independent of any particular
2787 * lck_rw_t. Assertions are in place on return
2788 * to userspace so that the boost is not held
2789 * indefinitely.
2790 *
2791 * The routines that increment/decrement the
2792 * per-thread counter should err on the side of
2793 * incrementing any time a preemption is possible
2794 * and the lock would be visible to the rest of the
2795 * system as held (so it should be incremented before
2796 * interlocks are dropped/preemption is enabled, or
2797 * before a CAS is executed to acquire the lock).
2798 *
2799 */
2800
2801 /*!
2802 * @function lck_rw_clear_promotion
2803 *
2804 * @abstract
2805 * Undo priority promotions when the last rw_lock
2806 * is released by a thread (if a promotion was active).
2807 *
2808 * @param thread thread to demote.
2809 * @param lock object reason for the demotion.
2810 */
2811 __attribute__((noinline))
2812 static void
lck_rw_clear_promotion(thread_t thread,const void * lock)2813 lck_rw_clear_promotion(thread_t thread, const void *lock)
2814 {
2815 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2816 spl_t s = splsched();
2817 thread_lock(thread);
2818
2819 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2820 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED,
2821 unslide_for_kdebug(lock));
2822 }
2823
2824 thread_unlock(thread);
2825 splx(s);
2826 }
2827
2828 /*!
2829 * @function lck_rw_set_promotion_locked
2830 *
2831 * @abstract
2832 * Callout from context switch if the thread goes
2833 * off core with a positive rwlock_count.
2834 *
2835 * @discussion
2836 * Called at splsched with the thread locked.
2837 *
2838 * @param thread thread to promote.
2839 */
2840 __attribute__((always_inline))
2841 void
lck_rw_set_promotion_locked(thread_t thread)2842 lck_rw_set_promotion_locked(thread_t thread)
2843 {
2844 if (LcksOpts & LCK_OPTION_DISABLE_RW_PRIO) {
2845 return;
2846 }
2847
2848 assert(thread->rwlock_count > 0);
2849
2850 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2851 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2852 }
2853 }
2854
2855 __attribute__((always_inline))
2856 void
lck_rw_lock_count_inc(thread_t thread,const void * lock __unused)2857 lck_rw_lock_count_inc(thread_t thread, const void *lock __unused)
2858 {
2859 if (thread->rwlock_count++ == 0) {
2860 #if MACH_ASSERT
2861 /*
2862 * Set the ast to check that the
2863 * rwlock_count is going to be set to zero when
2864 * going back to userspace.
2865 * Set it only once when we increment it for the first time.
2866 */
2867 act_set_debug_assert();
2868 #endif
2869 }
2870 }
2871
2872 __abortlike
2873 static void
__lck_rw_lock_count_dec_panic(thread_t thread)2874 __lck_rw_lock_count_dec_panic(thread_t thread)
2875 {
2876 panic("rw lock count underflow for thread %p", thread);
2877 }
2878
2879 __attribute__((always_inline))
2880 void
lck_rw_lock_count_dec(thread_t thread,const void * lock)2881 lck_rw_lock_count_dec(thread_t thread, const void *lock)
2882 {
2883 uint32_t rwlock_count = thread->rwlock_count--;
2884
2885 if (rwlock_count == 0) {
2886 __lck_rw_lock_count_dec_panic(thread);
2887 }
2888
2889 if (__probable(rwlock_count == 1)) {
2890 /* sched_flags checked without lock, but will be rechecked while clearing */
2891 if (__improbable(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2892 lck_rw_clear_promotion(thread, lock);
2893 }
2894 }
2895 }
2896