1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #include <debug.h>
57 #include <kern/lock_stat.h>
58 #include <kern/locks.h>
59 #include <kern/zalloc.h>
60 #include <kern/thread.h>
61 #include <kern/processor.h>
62 #include <kern/sched_prim.h>
63 #include <kern/debug.h>
64 #include <machine/atomic.h>
65 #include <machine/machine_cpu.h>
66
67 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
68
69 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
70 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
71 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
72 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
73
74 #if CONFIG_DTRACE
75 #define DTRACE_RW_SHARED 0x0 //reader
76 #define DTRACE_RW_EXCL 0x1 //writer
77 #define DTRACE_NO_FLAG 0x0 //not applicable
78 #endif /* CONFIG_DTRACE */
79
80 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
81 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
82 #define LCK_RW_LCK_SHARED_CODE 0x102
83 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
84 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
85 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
86
87 #if __x86_64__
88 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
89 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
90 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
91 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
92 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
93 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
94 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
95 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
96 #endif
97
98 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
99 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
100
101 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
102 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
103 #define ordered_load_rw_owner(lock) os_atomic_load(&(lock)->lck_rw_owner, compiler_acq_rel)
104 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
105
106 #ifdef DEBUG_RW
107 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
108 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
109 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
110 #define rw_lock_debug_disabled() ((LcksOpts & disLkRWDebug) == disLkRWDebug)
111
112 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
113 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
114
115 #endif /* DEBUG_RW */
116
117 /*!
118 * @function lck_rw_alloc_init
119 *
120 * @abstract
121 * Allocates and initializes a rw_lock_t.
122 *
123 * @discussion
124 * The function can block. See lck_rw_init() for initialization details.
125 *
126 * @param grp lock group to associate with the lock.
127 * @param attr lock attribute to initialize the lock.
128 *
129 * @returns NULL or the allocated lock
130 */
131 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)132 lck_rw_alloc_init(
133 lck_grp_t *grp,
134 lck_attr_t *attr)
135 {
136 lck_rw_t *lck;
137
138 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
139 lck_rw_init(lck, grp, attr);
140 return lck;
141 }
142
143 /*!
144 * @function lck_rw_init
145 *
146 * @abstract
147 * Initializes a rw_lock_t.
148 *
149 * @discussion
150 * Usage statistics for the lock are going to be added to the lock group provided.
151 *
152 * The lock attribute can be used to specify the lock contention behaviour.
153 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
154 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
155 *
156 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
157 * if the lock is held and a writer starts waiting for the lock, readers will not be able
158 * to acquire the lock until all writers stop contending. Readers could
159 * potentially starve.
160 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
161 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
162 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
163 * starve.
164 *
165 * @param lck lock to initialize.
166 * @param grp lock group to associate with the lock.
167 * @param attr lock attribute to initialize the lock.
168 *
169 */
170 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)171 lck_rw_init(
172 lck_rw_t *lck,
173 lck_grp_t *grp,
174 lck_attr_t *attr)
175 {
176 if (attr == LCK_ATTR_NULL) {
177 attr = &LockDefaultLckAttr;
178 }
179 memset(lck, 0, sizeof(lck_rw_t));
180 lck->lck_rw_can_sleep = TRUE;
181 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
182 lck->lck_rw_priv_excl = TRUE;
183 }
184 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
185 }
186
187 /*!
188 * @function lck_rw_free
189 *
190 * @abstract
191 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
192 *
193 * @discussion
194 * The lock must be not held by any thread.
195 *
196 * @param lck rw_lock to free.
197 */
198 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)199 lck_rw_free(
200 lck_rw_t *lck,
201 lck_grp_t *grp)
202 {
203 lck_rw_destroy(lck, grp);
204 zfree(KT_LCK_RW, lck);
205 }
206
207 /*!
208 * @function lck_rw_destroy
209 *
210 * @abstract
211 * Destroys a rw_lock previously initialized with lck_rw_init().
212 *
213 * @discussion
214 * The lock must be not held by any thread.
215 *
216 * @param lck rw_lock to destroy.
217 */
218 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)219 lck_rw_destroy(
220 lck_rw_t *lck,
221 lck_grp_t *grp)
222 {
223 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
224 panic("Destroying previously destroyed lock %p", lck);
225 }
226 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
227
228 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
229 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
230 }
231
232 #ifdef DEBUG_RW
233
234 /*
235 * Best effort mechanism to debug rw_locks.
236 *
237 * This mechanism is in addition to the owner checks. The owner is set
238 * only when the lock is held in exclusive mode so the checks do not cover
239 * the cases in which the lock is held in shared mode.
240 *
241 * This mechanism tentatively stores the rw_lock acquired and its debug
242 * information on the thread struct.
243 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
244 *
245 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
246 * at the same time. If a thread holds more than this number of rw_locks we
247 * will start losing debug information.
248 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
249 * store the debug information but it will require more memory per thread
250 * and longer lock/unlock time.
251 *
252 * If an empty slot is found for the debug information, we record the lock
253 * otherwise we set the overflow threshold flag.
254 *
255 * If we reached the overflow threshold we might stop asserting because we cannot be sure
256 * anymore if the lock was acquired or not.
257 *
258 * Even if we reached the overflow threshold, we try to store the debug information
259 * for the new locks acquired. This can be useful in core dumps to debug
260 * possible return to userspace without unlocking and to find possible readers
261 * holding the lock.
262 */
263 __startup_func
264 static void
rw_lock_init(void)265 rw_lock_init(void)
266 {
267 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
268 LcksOpts |= disLkRWDebug;
269 }
270 }
271 STARTUP(LOCKS_EARLY, STARTUP_RANK_FIRST, rw_lock_init);
272
273 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)274 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
275 {
276 int i;
277 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
278 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
279 if (existing->rwlde_lock == lock) {
280 return existing;
281 }
282 }
283
284 return NULL;
285 }
286
287 __abortlike
288 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)289 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
290 {
291 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
292 }
293
294 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)295 find_empty_slot(rw_lock_debug_t *rw_locks_held)
296 {
297 int i;
298 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
299 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
300 if (entry->rwlde_lock == NULL) {
301 return entry;
302 }
303 }
304 rwlock_slot_panic(rw_locks_held);
305 }
306
307 __abortlike
308 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)309 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
310 {
311 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
312 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
313 ordered_load_rw(lock), ordered_load_rw_owner(lock));
314 }
315
316 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)317 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
318 {
319 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
320
321 if (__probable(rw_lock_debug_disabled() || (rw_locks_held->rwld_locks_acquired == 0))) {
322 //no locks saved, safe to lock
323 return;
324 }
325
326 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
327 if (__improbable(entry != NULL)) {
328 boolean_t can_be_shared_recursive;
329 if (lck_rw_recursive_shared_assert_74048094) {
330 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
331 } else {
332 /* currently rw_lock_shared is called recursively,
333 * until the code is fixed allow to lock
334 * recursively in shared mode
335 */
336 can_be_shared_recursive = TRUE;
337 }
338 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
339 return;
340 }
341 canlock_rwlock_panic(lock, thread, entry);
342 }
343 }
344
345 __abortlike
346 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)347 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
348 {
349 panic("RW lock %p not held by %p", lock, thread);
350 }
351
352 __abortlike
353 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)354 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
355 {
356 if (type == LCK_RW_TYPE_EXCLUSIVE) {
357 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
358 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
359 ordered_load_rw(lock), ordered_load_rw_owner(lock));
360 } else {
361 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
362 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
363 ordered_load_rw(lock), ordered_load_rw_owner(lock));
364 }
365 }
366
367 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)368 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
369 {
370 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
371
372 if (__probable(rw_lock_debug_disabled())) {
373 return;
374 }
375
376 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
377 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
378 held_rwlock_notheld_panic(lock, thread);
379 }
380 return;
381 }
382
383 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
384 if (__probable(entry != NULL)) {
385 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
386 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
387 } else {
388 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
389 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
390 }
391 }
392 } else {
393 if (rw_locks_held->rwld_overflow == 0) {
394 held_rwlock_notheld_panic(lock, thread);
395 }
396 }
397 }
398
399 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)400 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
401 {
402 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
403
404 if (__probable(rw_lock_debug_disabled())) {
405 return;
406 }
407
408 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
409 if (rw_locks_held->rwld_overflow == 0) {
410 held_rwlock_notheld_panic(lock, thread);
411 }
412 return;
413 }
414
415 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
416 if (__probable(entry != NULL)) {
417 if (typeFrom == LCK_RW_TYPE_SHARED) {
418 //We are upgrading
419 assertf(entry->rwlde_mode_count == 1,
420 "RW lock %p not held by a single shared when upgrading "
421 "by %p caller %p read %d state 0x%x owner 0x%p ",
422 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
423 ordered_load_rw(lock), ordered_load_rw_owner(lock));
424 entry->rwlde_mode_count = -1;
425 set_rwlde_caller_packed(entry, caller);
426 } else {
427 //We are downgrading
428 assertf(entry->rwlde_mode_count == -1,
429 "RW lock %p not held in write mode when downgrading "
430 "by %p caller %p read %d state 0x%x owner 0x%p ",
431 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
432 ordered_load_rw(lock), ordered_load_rw_owner(lock));
433 entry->rwlde_mode_count = 1;
434 set_rwlde_caller_packed(entry, caller);
435 }
436 return;
437 }
438
439 if (rw_locks_held->rwld_overflow == 0) {
440 held_rwlock_notheld_panic(lock, thread);
441 }
442
443 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
444 //array is full
445 return;
446 }
447
448 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
449 null_entry->rwlde_lock = lock;
450 set_rwlde_caller_packed(null_entry, caller);
451 if (typeFrom == LCK_RW_TYPE_SHARED) {
452 null_entry->rwlde_mode_count = -1;
453 } else {
454 null_entry->rwlde_mode_count = 1;
455 }
456 rw_locks_held->rwld_locks_saved++;
457 }
458
459 __abortlike
460 static void
add_held_rwlock_too_many_panic(thread_t thread)461 add_held_rwlock_too_many_panic(thread_t thread)
462 {
463 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
464 }
465
466 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)467 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
468 {
469 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
470 struct rw_lock_debug_entry *null_entry;
471
472 if (__probable(rw_lock_debug_disabled())) {
473 return;
474 }
475
476 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
477 add_held_rwlock_too_many_panic(thread);
478 }
479 rw_locks_held->rwld_locks_acquired++;
480
481 if (type == LCK_RW_TYPE_EXCLUSIVE) {
482 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
483 //array is full
484 rw_locks_held->rwld_overflow = 1;
485 return;
486 }
487 null_entry = find_empty_slot(rw_locks_held);
488 null_entry->rwlde_lock = lock;
489 set_rwlde_caller_packed(null_entry, caller);
490 null_entry->rwlde_mode_count = -1;
491 rw_locks_held->rwld_locks_saved++;
492 return;
493 } else {
494 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
495 //array is empty
496 goto add_shared;
497 }
498
499 boolean_t allow_shared_recursive;
500 if (lck_rw_recursive_shared_assert_74048094) {
501 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
502 } else {
503 allow_shared_recursive = TRUE;
504 }
505 if (allow_shared_recursive) {
506 //It could be already locked in shared mode
507 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
508 if (entry != NULL) {
509 assert(entry->rwlde_mode_count > 0);
510 assertf(entry->rwlde_mode_count != INT8_MAX,
511 "RW lock %p with too many recursive shared held "
512 "from %p caller %p read %d state 0x%x owner 0x%p",
513 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
514 ordered_load_rw(lock), ordered_load_rw_owner(lock));
515 entry->rwlde_mode_count += 1;
516 return;
517 }
518 }
519
520 //none of the locks were a match
521 //try to add a new entry
522 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
523 //array is full
524 rw_locks_held->rwld_overflow = 1;
525 return;
526 }
527
528 add_shared:
529 null_entry = find_empty_slot(rw_locks_held);
530 null_entry->rwlde_lock = lock;
531 set_rwlde_caller_packed(null_entry, caller);
532 null_entry->rwlde_mode_count = 1;
533 rw_locks_held->rwld_locks_saved++;
534 }
535 }
536
537 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)538 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
539 {
540 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
541
542 if (__probable(rw_lock_debug_disabled())) {
543 return;
544 }
545
546 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
547 return;
548 }
549 rw_locks_held->rwld_locks_acquired--;
550
551 if (rw_locks_held->rwld_locks_saved == 0) {
552 assert(rw_locks_held->rwld_overflow == 1);
553 goto out;
554 }
555
556 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
557 if (__probable(entry != NULL)) {
558 if (type == LCK_RW_TYPE_EXCLUSIVE) {
559 assert(entry->rwlde_mode_count == -1);
560 entry->rwlde_mode_count = 0;
561 } else {
562 assert(entry->rwlde_mode_count > 0);
563 entry->rwlde_mode_count--;
564 if (entry->rwlde_mode_count > 0) {
565 goto out;
566 }
567 }
568 entry->rwlde_caller_packed = 0;
569 entry->rwlde_lock = NULL;
570 rw_locks_held->rwld_locks_saved--;
571 } else {
572 assert(rw_locks_held->rwld_overflow == 1);
573 }
574
575 out:
576 if (rw_locks_held->rwld_locks_acquired == 0) {
577 rw_locks_held->rwld_overflow = 0;
578 }
579 return;
580 }
581 #endif /* DEBUG_RW */
582
583 /*
584 * We disable interrupts while holding the RW interlock to prevent an
585 * interrupt from exacerbating hold time.
586 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
587 */
588 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)589 lck_interlock_lock(
590 lck_rw_t *lck)
591 {
592 boolean_t istate;
593
594 istate = ml_set_interrupts_enabled(FALSE);
595 lck_rw_ilk_lock(lck);
596 return istate;
597 }
598
599 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)600 lck_interlock_unlock(
601 lck_rw_t *lck,
602 boolean_t istate)
603 {
604 lck_rw_ilk_unlock(lck);
605 ml_set_interrupts_enabled(istate);
606 }
607
608 static inline void
lck_rw_inc_thread_count(thread_t thread)609 lck_rw_inc_thread_count(
610 thread_t thread)
611 {
612 __assert_only uint32_t prev_rwlock_count;
613
614 prev_rwlock_count = thread->rwlock_count++;
615 #if MACH_ASSERT
616 /*
617 * Set the ast to check that the
618 * rwlock_count is going to be set to zero when
619 * going back to userspace.
620 * Set it only once when we increment it for the first time.
621 */
622 if (prev_rwlock_count == 0) {
623 act_set_debug_assert();
624 }
625 #endif
626 }
627
628 /*
629 * compute the deadline to spin against when
630 * waiting for a change of state on a lck_rw_t
631 */
632 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)633 lck_rw_deadline_for_spin(
634 lck_rw_t *lck)
635 {
636 lck_rw_word_t word;
637
638 word.data = ordered_load_rw(lck);
639 if (word.can_sleep) {
640 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
641 /*
642 * there are already threads waiting on this lock... this
643 * implies that they have spun beyond their deadlines waiting for
644 * the desired state to show up so we will not bother spinning at this time...
645 * or
646 * the current number of threads sharing this lock exceeds our capacity to run them
647 * concurrently and since all states we're going to spin for require the rw_shared_count
648 * to be at 0, we'll not bother spinning since the latency for this to happen is
649 * unpredictable...
650 */
651 return mach_absolute_time();
652 }
653 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
654 } else {
655 return mach_absolute_time() + (100000LL * 1000000000LL);
656 }
657 }
658
659 /*
660 * This inline is used when busy-waiting for an rw lock.
661 * If interrupts were disabled when the lock primitive was called,
662 * we poll the IPI handler for pending tlb flushes in x86.
663 */
664 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)665 lck_rw_lock_pause(
666 boolean_t interrupts_enabled)
667 {
668 #if X86_64
669 if (!interrupts_enabled) {
670 handle_pending_TLB_flushes();
671 }
672 cpu_pause();
673 #else
674 (void) interrupts_enabled;
675 wait_for_event();
676 #endif
677 }
678
679 static boolean_t
lck_rw_drain_status(lck_rw_t * lock,uint32_t status_mask,boolean_t wait)680 lck_rw_drain_status(
681 lck_rw_t *lock,
682 uint32_t status_mask,
683 boolean_t wait)
684 {
685 uint64_t deadline = 0;
686 uint32_t data;
687 boolean_t istate = FALSE;
688
689 if (wait) {
690 deadline = lck_rw_deadline_for_spin(lock);
691 #if __x86_64__
692 istate = ml_get_interrupts_enabled();
693 #endif
694 }
695
696 for (;;) {
697 #if __x86_64__
698 data = os_atomic_load(&lock->lck_rw_data, relaxed);
699 #else
700 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
701 #endif
702 if ((data & status_mask) == 0) {
703 break;
704 }
705 if (wait) {
706 lck_rw_lock_pause(istate);
707 } else {
708 atomic_exchange_abort();
709 }
710 if (!wait || (mach_absolute_time() >= deadline)) {
711 return FALSE;
712 }
713 }
714 atomic_exchange_abort();
715 return TRUE;
716 }
717
718 /*
719 * Spin while interlock is held.
720 */
721 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)722 lck_rw_interlock_spin(
723 lck_rw_t *lock)
724 {
725 uint32_t data, prev;
726
727 for (;;) {
728 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
729 if (data & LCK_RW_INTERLOCK) {
730 #if __x86_64__
731 cpu_pause();
732 #else
733 wait_for_event();
734 #endif
735 } else {
736 atomic_exchange_abort();
737 return;
738 }
739 }
740 }
741
742 #define LCK_RW_GRAB_WANT 0
743 #define LCK_RW_GRAB_SHARED 1
744
745 static boolean_t
lck_rw_grab(lck_rw_t * lock,int mode,boolean_t wait)746 lck_rw_grab(
747 lck_rw_t *lock,
748 int mode,
749 boolean_t wait)
750 {
751 uint64_t deadline = 0;
752 uint32_t data, prev;
753 boolean_t do_exch, istate = FALSE;
754
755 if (wait) {
756 deadline = lck_rw_deadline_for_spin(lock);
757 #if __x86_64__
758 istate = ml_get_interrupts_enabled();
759 #endif
760 }
761
762 for (;;) {
763 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
764 if (data & LCK_RW_INTERLOCK) {
765 atomic_exchange_abort();
766 lck_rw_interlock_spin(lock);
767 continue;
768 }
769 do_exch = FALSE;
770 if (mode == LCK_RW_GRAB_WANT) {
771 if ((data & LCK_RW_WANT_EXCL) == 0) {
772 data |= LCK_RW_WANT_EXCL;
773 do_exch = TRUE;
774 }
775 } else { // LCK_RW_GRAB_SHARED
776 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
777 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
778 data += LCK_RW_SHARED_READER;
779 do_exch = TRUE;
780 }
781 }
782 if (do_exch) {
783 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
784 return TRUE;
785 }
786 } else {
787 if (wait) {
788 lck_rw_lock_pause(istate);
789 } else {
790 atomic_exchange_abort();
791 }
792 if (!wait || (mach_absolute_time() >= deadline)) {
793 return FALSE;
794 }
795 }
796 }
797 }
798
799 static void
lck_rw_lock_exclusive_gen(lck_rw_t * lock)800 lck_rw_lock_exclusive_gen(
801 lck_rw_t *lock)
802 {
803 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
804 lck_rw_word_t word;
805 int slept = 0;
806 boolean_t gotlock = 0;
807 boolean_t not_shared_or_upgrade = 0;
808 wait_result_t res = 0;
809 boolean_t istate;
810
811 #if CONFIG_DTRACE
812 boolean_t dtrace_ls_initialized = FALSE;
813 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
814 uint64_t wait_interval = 0;
815 int readers_at_sleep = 0;
816 #endif
817
818 __assert_only thread_t owner = ordered_load_rw_owner(lock);
819 assertf(owner != current_thread(), "Lock already held state=0x%x, owner=%p",
820 ordered_load_rw(lock), owner);
821
822 #ifdef DEBUG_RW
823 /*
824 * Best effort attempt to check that this thread
825 * is not already holding the lock (this checks read mode too).
826 */
827 assert_canlock_rwlock(lock, current_thread(), LCK_RW_TYPE_EXCLUSIVE);
828 #endif /* DEBUG_RW */
829
830 /*
831 * Try to acquire the lck_rw_want_excl bit.
832 */
833 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
834 #if CONFIG_DTRACE
835 if (dtrace_ls_initialized == FALSE) {
836 dtrace_ls_initialized = TRUE;
837 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
838 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
839 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
840 if (dtrace_ls_enabled) {
841 /*
842 * Either sleeping or spinning is happening,
843 * start a timing of our delay interval now.
844 */
845 readers_at_sleep = lock->lck_rw_shared_count;
846 wait_interval = mach_absolute_time();
847 }
848 }
849 #endif
850
851 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
852
853 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
854
855 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
856
857 if (gotlock) {
858 break;
859 }
860 /*
861 * if we get here, the deadline has expired w/o us
862 * being able to grab the lock exclusively
863 * check to see if we're allowed to do a thread_block
864 */
865 word.data = ordered_load_rw(lock);
866 if (word.can_sleep) {
867 istate = lck_interlock_lock(lock);
868 word.data = ordered_load_rw(lock);
869
870 if (word.want_excl) {
871 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
872
873 word.w_waiting = 1;
874 ordered_store_rw(lock, word.data);
875
876 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
877 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
878 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
879 lck_interlock_unlock(lock, istate);
880 if (res == THREAD_WAITING) {
881 res = thread_block(THREAD_CONTINUE_NULL);
882 slept++;
883 }
884 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
885 } else {
886 word.want_excl = 1;
887 ordered_store_rw(lock, word.data);
888 lck_interlock_unlock(lock, istate);
889 break;
890 }
891 }
892 }
893 /*
894 * Wait for readers (and upgrades) to finish...
895 */
896 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
897 #if CONFIG_DTRACE
898 /*
899 * Either sleeping or spinning is happening, start
900 * a timing of our delay interval now. If we set it
901 * to -1 we don't have accurate data so we cannot later
902 * decide to record a dtrace spin or sleep event.
903 */
904 if (dtrace_ls_initialized == FALSE) {
905 dtrace_ls_initialized = TRUE;
906 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
907 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
908 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
909 if (dtrace_ls_enabled) {
910 /*
911 * Either sleeping or spinning is happening,
912 * start a timing of our delay interval now.
913 */
914 readers_at_sleep = lock->lck_rw_shared_count;
915 wait_interval = mach_absolute_time();
916 }
917 }
918 #endif
919
920 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
921
922 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
923
924 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
925
926 if (not_shared_or_upgrade) {
927 break;
928 }
929 /*
930 * if we get here, the deadline has expired w/o us
931 * being able to grab the lock exclusively
932 * check to see if we're allowed to do a thread_block
933 */
934 word.data = ordered_load_rw(lock);
935 if (word.can_sleep) {
936 istate = lck_interlock_lock(lock);
937 word.data = ordered_load_rw(lock);
938
939 if (word.shared_count != 0 || word.want_upgrade) {
940 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
941
942 word.w_waiting = 1;
943 ordered_store_rw(lock, word.data);
944
945 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
946 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
947 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
948 lck_interlock_unlock(lock, istate);
949
950 if (res == THREAD_WAITING) {
951 res = thread_block(THREAD_CONTINUE_NULL);
952 slept++;
953 }
954 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
955 } else {
956 lck_interlock_unlock(lock, istate);
957 /*
958 * must own the lock now, since we checked for
959 * readers or upgrade owner behind the interlock
960 * no need for a call to 'lck_rw_drain_status'
961 */
962 break;
963 }
964 }
965 }
966
967 #if CONFIG_DTRACE
968 /*
969 * Decide what latencies we suffered that are Dtrace events.
970 * If we have set wait_interval, then we either spun or slept.
971 * At least we get out from under the interlock before we record
972 * which is the best we can do here to minimize the impact
973 * of the tracing.
974 * If we have set wait_interval to -1, then dtrace was not enabled when we
975 * started sleeping/spinning so we don't record this event.
976 */
977 if (dtrace_ls_enabled == TRUE) {
978 if (slept == 0) {
979 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
980 mach_absolute_time() - wait_interval, 1);
981 } else {
982 /*
983 * For the blocking case, we also record if when we blocked
984 * it was held for read or write, and how many readers.
985 * Notice that above we recorded this before we dropped
986 * the interlock so the count is accurate.
987 */
988 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
989 mach_absolute_time() - wait_interval, 1,
990 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
991 }
992 }
993 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
994 #endif /* CONFIG_DTRACE */
995 }
996
997 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
998 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
999 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1000 /*!
1001 * @function lck_rw_lock_exclusive_check_contended
1002 *
1003 * @abstract
1004 * Locks a rw_lock in exclusive mode.
1005 *
1006 * @discussion
1007 * This routine IS EXPERIMENTAL.
1008 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1009 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1010 *
1011 * @param lock rw_lock to lock.
1012 *
1013 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1014 * otherwise.
1015 */
1016 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1017 lck_rw_lock_exclusive_check_contended(
1018 lck_rw_t *lock)
1019 {
1020 thread_t thread = current_thread();
1021 bool contended = false;
1022
1023 if (lock->lck_rw_can_sleep) {
1024 lck_rw_inc_thread_count(thread);
1025 } else if (get_preemption_level() == 0) {
1026 panic("Taking non-sleepable RW lock with preemption enabled");
1027 }
1028
1029 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1030 #if CONFIG_DTRACE
1031 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1032 #endif /* CONFIG_DTRACE */
1033 } else {
1034 contended = true;
1035 lck_rw_lock_exclusive_gen(lock);
1036 }
1037 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1038 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1039
1040 ordered_store_rw_owner(lock, thread);
1041
1042 #ifdef DEBUG_RW
1043 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1044 #endif /* DEBUG_RW */
1045 return contended;
1046 }
1047
1048 __attribute__((always_inline))
1049 static void
lck_rw_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)1050 lck_rw_lock_exclusive_internal_inline(
1051 lck_rw_t *lock,
1052 void *caller)
1053 {
1054 #pragma unused(caller)
1055 thread_t thread = current_thread();
1056
1057 if (lock->lck_rw_can_sleep) {
1058 lck_rw_inc_thread_count(thread);
1059 } else if (get_preemption_level() == 0) {
1060 panic("Taking non-sleepable RW lock with preemption enabled");
1061 }
1062
1063 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1064 #if CONFIG_DTRACE
1065 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1066 #endif /* CONFIG_DTRACE */
1067 } else {
1068 lck_rw_lock_exclusive_gen(lock);
1069 }
1070
1071 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1072 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1073
1074 ordered_store_rw_owner(lock, thread);
1075
1076 #if DEBUG_RW
1077 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1078 #endif /* DEBUG_RW */
1079 }
1080
1081 __attribute__((noinline))
1082 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1083 lck_rw_lock_exclusive_internal(
1084 lck_rw_t *lock,
1085 void *caller)
1086 {
1087 lck_rw_lock_exclusive_internal_inline(lock, caller);
1088 }
1089
1090 /*!
1091 * @function lck_rw_lock_exclusive
1092 *
1093 * @abstract
1094 * Locks a rw_lock in exclusive mode.
1095 *
1096 * @discussion
1097 * This function can block.
1098 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1099 * can acquire it in exclusive mode.
1100 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1101 *
1102 * @param lock rw_lock to lock.
1103 */
1104 void
lck_rw_lock_exclusive(lck_rw_t * lock)1105 lck_rw_lock_exclusive(
1106 lck_rw_t *lock)
1107 {
1108 lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
1109 }
1110
1111 /*
1112 * Routine: lck_rw_lock_shared_gen
1113 * Function:
1114 * Fast path code has determined that this lock
1115 * is held exclusively... this is where we spin/block
1116 * until we can acquire the lock in the shared mode
1117 */
1118 static void
lck_rw_lock_shared_gen(lck_rw_t * lck)1119 lck_rw_lock_shared_gen(
1120 lck_rw_t *lck)
1121 {
1122 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1123 lck_rw_word_t word;
1124 boolean_t gotlock = 0;
1125 int slept = 0;
1126 wait_result_t res = 0;
1127 boolean_t istate;
1128
1129 #if CONFIG_DTRACE
1130 uint64_t wait_interval = 0;
1131 int readers_at_sleep = 0;
1132 boolean_t dtrace_ls_initialized = FALSE;
1133 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1134 #endif /* CONFIG_DTRACE */
1135
1136 __assert_only thread_t owner = ordered_load_rw_owner(lck);
1137 assertf(owner != current_thread(), "Lock already held state=0x%x, owner=%p",
1138 ordered_load_rw(lck), owner);
1139 #ifdef DEBUG_RW
1140 /*
1141 * Best effort attempt to check that this thread
1142 * is not already holding the lock in shared mode.
1143 */
1144 assert_canlock_rwlock(lck, current_thread(), LCK_RW_TYPE_SHARED);
1145 #endif
1146
1147 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
1148 #if CONFIG_DTRACE
1149 if (dtrace_ls_initialized == FALSE) {
1150 dtrace_ls_initialized = TRUE;
1151 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1152 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1153 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1154 if (dtrace_ls_enabled) {
1155 /*
1156 * Either sleeping or spinning is happening,
1157 * start a timing of our delay interval now.
1158 */
1159 readers_at_sleep = lck->lck_rw_shared_count;
1160 wait_interval = mach_absolute_time();
1161 }
1162 }
1163 #endif
1164
1165 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1166 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1167
1168 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
1169
1170 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1171 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
1172
1173 if (gotlock) {
1174 break;
1175 }
1176 /*
1177 * if we get here, the deadline has expired w/o us
1178 * being able to grab the lock for read
1179 * check to see if we're allowed to do a thread_block
1180 */
1181 if (lck->lck_rw_can_sleep) {
1182 istate = lck_interlock_lock(lck);
1183
1184 word.data = ordered_load_rw(lck);
1185 if ((word.want_excl || word.want_upgrade) &&
1186 ((word.shared_count == 0) || word.priv_excl)) {
1187 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1188 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1189
1190 word.r_waiting = 1;
1191 ordered_store_rw(lck, word.data);
1192
1193 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1194 res = assert_wait(LCK_RW_READER_EVENT(lck),
1195 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1196 lck_interlock_unlock(lck, istate);
1197
1198 if (res == THREAD_WAITING) {
1199 res = thread_block(THREAD_CONTINUE_NULL);
1200 slept++;
1201 }
1202 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1203 trace_lck, res, slept, 0, 0);
1204 } else {
1205 word.shared_count++;
1206 ordered_store_rw(lck, word.data);
1207 lck_interlock_unlock(lck, istate);
1208 break;
1209 }
1210 }
1211 }
1212
1213 #if CONFIG_DTRACE
1214 if (dtrace_ls_enabled == TRUE) {
1215 if (slept == 0) {
1216 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1217 } else {
1218 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1219 mach_absolute_time() - wait_interval, 0,
1220 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1221 }
1222 }
1223 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1224 #endif /* CONFIG_DTRACE */
1225 }
1226
1227 __attribute__((always_inline))
1228 static void
lck_rw_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1229 lck_rw_lock_shared_internal_inline(
1230 lck_rw_t *lock,
1231 void *caller)
1232 {
1233 #pragma unused(caller)
1234
1235 uint32_t data, prev;
1236 thread_t thread = current_thread();
1237 __assert_only thread_t owner;
1238 #ifdef DEBUG_RW
1239 boolean_t check_canlock = TRUE;
1240 #endif
1241
1242 if (lock->lck_rw_can_sleep) {
1243 lck_rw_inc_thread_count(thread);
1244 } else if (get_preemption_level() == 0) {
1245 panic("Taking non-sleepable RW lock with preemption enabled");
1246 }
1247
1248 for (;;) {
1249 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1250 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1251 atomic_exchange_abort();
1252 lck_rw_lock_shared_gen(lock);
1253 goto locked;
1254 }
1255 #ifdef DEBUG_RW
1256 if ((data & LCK_RW_SHARED_MASK) == 0) {
1257 /*
1258 * If the lock is uncontended,
1259 * we do not need to check if we can lock it
1260 */
1261 check_canlock = FALSE;
1262 }
1263 #endif
1264 data += LCK_RW_SHARED_READER;
1265 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1266 break;
1267 }
1268 cpu_pause();
1269 }
1270 #ifdef DEBUG_RW
1271 if (check_canlock) {
1272 /*
1273 * Best effort attempt to check that this thread
1274 * is not already holding the lock (this checks read mode too).
1275 */
1276 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1277 }
1278 #endif
1279 locked:
1280 owner = ordered_load_rw_owner(lock);
1281 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1282
1283 #if CONFIG_DTRACE
1284 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1285 #endif /* CONFIG_DTRACE */
1286
1287 #ifdef DEBUG_RW
1288 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1289 #endif /* DEBUG_RW */
1290 }
1291
1292 __attribute__((noinline))
1293 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1294 lck_rw_lock_shared_internal(
1295 lck_rw_t *lock,
1296 void *caller)
1297 {
1298 lck_rw_lock_shared_internal_inline(lock, caller);
1299 }
1300
1301 /*!
1302 * @function lck_rw_lock_shared
1303 *
1304 * @abstract
1305 * Locks a rw_lock in shared mode.
1306 *
1307 * @discussion
1308 * This function can block.
1309 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1310 * can acquire it in exclusive mode.
1311 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1312 * the lock without waiting.
1313 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1314 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1315 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1316 * in shared mode.
1317 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1318 *
1319 * @param lock rw_lock to lock.
1320 */
1321 void
lck_rw_lock_shared(lck_rw_t * lock)1322 lck_rw_lock_shared(
1323 lck_rw_t *lock)
1324 {
1325 lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0));
1326 }
1327
1328 /*
1329 * Routine: lck_rw_lock_shared_to_exclusive_failure
1330 * Function:
1331 * Fast path code has already dropped our read
1332 * count and determined that someone else owns 'lck_rw_want_upgrade'
1333 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1334 * all we need to do here is determine if a wakeup is needed
1335 */
1336 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1337 lck_rw_lock_shared_to_exclusive_failure(
1338 lck_rw_t *lck,
1339 uint32_t prior_lock_state)
1340 {
1341 thread_t thread = current_thread();
1342 uint32_t rwlock_count;
1343
1344 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1345 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1346 /*
1347 * Someone else has requested upgrade.
1348 * Since we've released the read lock, wake
1349 * him up if he's blocked waiting
1350 */
1351 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1352 }
1353
1354 /* Check if dropping the lock means that we need to unpromote */
1355 if (lck->lck_rw_can_sleep) {
1356 rwlock_count = thread->rwlock_count--;
1357 } else {
1358 rwlock_count = UINT32_MAX;
1359 }
1360
1361 if (rwlock_count == 0) {
1362 panic("rw lock count underflow for thread %p", thread);
1363 }
1364
1365 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1366 /* sched_flags checked without lock, but will be rechecked while clearing */
1367 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1368 }
1369
1370 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1371 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1372
1373 #ifdef DEBUG_RW
1374 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1375 #endif /* DEBUG_RW */
1376
1377 return FALSE;
1378 }
1379
1380 /*
1381 * Routine: lck_rw_lock_shared_to_exclusive_success
1382 * Function:
1383 * the fast path code has already dropped our read
1384 * count and successfully acquired 'lck_rw_want_upgrade'
1385 * we just need to wait for the rest of the readers to drain
1386 * and then we can return as the exclusive holder of this lock
1387 */
1388 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1389 lck_rw_lock_shared_to_exclusive_success(
1390 lck_rw_t *lock)
1391 {
1392 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1393 int slept = 0;
1394 lck_rw_word_t word;
1395 wait_result_t res;
1396 boolean_t istate;
1397 boolean_t not_shared;
1398
1399 #if CONFIG_DTRACE
1400 uint64_t wait_interval = 0;
1401 int readers_at_sleep = 0;
1402 boolean_t dtrace_ls_initialized = FALSE;
1403 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1404 #endif
1405
1406 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1407 word.data = ordered_load_rw(lock);
1408 #if CONFIG_DTRACE
1409 if (dtrace_ls_initialized == FALSE) {
1410 dtrace_ls_initialized = TRUE;
1411 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1412 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1413 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1414 if (dtrace_ls_enabled) {
1415 /*
1416 * Either sleeping or spinning is happening,
1417 * start a timing of our delay interval now.
1418 */
1419 readers_at_sleep = word.shared_count;
1420 wait_interval = mach_absolute_time();
1421 }
1422 }
1423 #endif
1424
1425 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1426 trace_lck, word.shared_count, 0, 0, 0);
1427
1428 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1429
1430 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1431 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1432
1433 if (not_shared) {
1434 break;
1435 }
1436
1437 /*
1438 * if we get here, the spin deadline in lck_rw_wait_on_status()
1439 * has expired w/o the rw_shared_count having drained to 0
1440 * check to see if we're allowed to do a thread_block
1441 */
1442 if (word.can_sleep) {
1443 istate = lck_interlock_lock(lock);
1444
1445 word.data = ordered_load_rw(lock);
1446 if (word.shared_count != 0) {
1447 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1448 trace_lck, word.shared_count, 0, 0, 0);
1449
1450 word.w_waiting = 1;
1451 ordered_store_rw(lock, word.data);
1452
1453 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1454 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1455 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1456 lck_interlock_unlock(lock, istate);
1457
1458 if (res == THREAD_WAITING) {
1459 res = thread_block(THREAD_CONTINUE_NULL);
1460 slept++;
1461 }
1462 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1463 trace_lck, res, slept, 0, 0);
1464 } else {
1465 lck_interlock_unlock(lock, istate);
1466 break;
1467 }
1468 }
1469 }
1470 #if CONFIG_DTRACE
1471 /*
1472 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1473 */
1474 if (dtrace_ls_enabled == TRUE) {
1475 if (slept == 0) {
1476 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1477 } else {
1478 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1479 mach_absolute_time() - wait_interval, 1,
1480 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1481 }
1482 }
1483 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1484 #endif
1485 }
1486
1487 /*!
1488 * @function lck_rw_lock_shared_to_exclusive
1489 *
1490 * @abstract
1491 * Upgrades a rw_lock held in shared mode to exclusive.
1492 *
1493 * @discussion
1494 * This function can block.
1495 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1496 * return with the lock not held.
1497 * The caller needs to hold the lock in shared mode to upgrade it.
1498 *
1499 * @param lock rw_lock already held in shared mode to upgrade.
1500 *
1501 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1502 * If the function was not able to upgrade the lock, the lock will be dropped
1503 * by the function.
1504 */
1505 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1506 lck_rw_lock_shared_to_exclusive(
1507 lck_rw_t *lock)
1508 {
1509 uint32_t data, prev;
1510
1511 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1512
1513 #if DEBUG_RW
1514 thread_t thread = current_thread();
1515 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1516 #endif /* DEBUG_RW */
1517
1518 for (;;) {
1519 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1520 if (data & LCK_RW_INTERLOCK) {
1521 atomic_exchange_abort();
1522 lck_rw_interlock_spin(lock);
1523 continue;
1524 }
1525 if (data & LCK_RW_WANT_UPGRADE) {
1526 data -= LCK_RW_SHARED_READER;
1527 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1528 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1529 }
1530 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1531 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1532 }
1533 } else {
1534 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1535 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1536 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1537 break;
1538 }
1539 }
1540 cpu_pause();
1541 }
1542 /* we now own the WANT_UPGRADE */
1543 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1544 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1545 }
1546 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1547 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1548
1549 ordered_store_rw_owner(lock, current_thread());
1550 #if CONFIG_DTRACE
1551 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1552 #endif /* CONFIG_DTRACE */
1553
1554 #if DEBUG_RW
1555 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1556 #endif /* DEBUG_RW */
1557 return TRUE;
1558 }
1559
1560 /*
1561 * Routine: lck_rw_lock_exclusive_to_shared_gen
1562 * Function:
1563 * Fast path has already dropped
1564 * our exclusive state and bumped lck_rw_shared_count
1565 * all we need to do here is determine if anyone
1566 * needs to be awakened.
1567 */
1568 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1569 lck_rw_lock_exclusive_to_shared_gen(
1570 lck_rw_t *lck,
1571 uint32_t prior_lock_state,
1572 void *caller)
1573 {
1574 #pragma unused(caller)
1575 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1576 lck_rw_word_t fake_lck;
1577
1578 /*
1579 * prior_lock state is a snapshot of the 1st word of the
1580 * lock in question... we'll fake up a pointer to it
1581 * and carefully not access anything beyond whats defined
1582 * in the first word of a lck_rw_t
1583 */
1584 fake_lck.data = prior_lock_state;
1585
1586 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1587 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1588
1589 /*
1590 * don't wake up anyone waiting to take the lock exclusively
1591 * since we hold a read count... when the read count drops to 0,
1592 * the writers will be woken.
1593 *
1594 * wake up any waiting readers if we don't have any writers waiting,
1595 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1596 */
1597 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1598 thread_wakeup(LCK_RW_READER_EVENT(lck));
1599 }
1600
1601 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1602 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1603
1604 #if CONFIG_DTRACE
1605 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1606 #endif
1607
1608 #if DEBUG_RW
1609 thread_t thread = current_thread();
1610 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1611 #endif /* DEBUG_RW */
1612 }
1613
1614 /*!
1615 * @function lck_rw_lock_exclusive_to_shared
1616 *
1617 * @abstract
1618 * Downgrades a rw_lock held in exclusive mode to shared.
1619 *
1620 * @discussion
1621 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1622 *
1623 * @param lock rw_lock already held in exclusive mode to downgrade.
1624 */
1625 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1626 lck_rw_lock_exclusive_to_shared(
1627 lck_rw_t *lock)
1628 {
1629 uint32_t data, prev;
1630
1631 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1632 ordered_store_rw_owner(lock, THREAD_NULL);
1633 for (;;) {
1634 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1635 if (data & LCK_RW_INTERLOCK) {
1636 atomic_exchange_abort();
1637 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1638 continue;
1639 }
1640 data += LCK_RW_SHARED_READER;
1641 if (data & LCK_RW_WANT_UPGRADE) {
1642 data &= ~(LCK_RW_WANT_UPGRADE);
1643 } else {
1644 data &= ~(LCK_RW_WANT_EXCL);
1645 }
1646 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1647 data &= ~(LCK_RW_W_WAITING);
1648 }
1649 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1650 break;
1651 }
1652 cpu_pause();
1653 }
1654 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1655 }
1656
1657 /*
1658 * Very sad hack, but the codegen for lck_rw_lock
1659 * is very unhappy with the combination of __builtin_return_address()
1660 * and a noreturn function. For some reason it adds more frames
1661 * than it should. rdar://76570684
1662 */
1663 void
1664 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1665 #pragma clang diagnostic push
1666 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1667 __attribute__((noinline, weak))
1668 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1669 _lck_rw_lock_type_panic(
1670 lck_rw_t *lck,
1671 lck_rw_type_t lck_rw_type)
1672 {
1673 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1674 }
1675 #pragma clang diagnostic pop
1676
1677 /*!
1678 * @function lck_rw_lock
1679 *
1680 * @abstract
1681 * Locks a rw_lock with the specified type.
1682 *
1683 * @discussion
1684 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1685 *
1686 * @param lck rw_lock to lock.
1687 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1688 */
1689 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1690 lck_rw_lock(
1691 lck_rw_t *lck,
1692 lck_rw_type_t lck_rw_type)
1693 {
1694 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1695 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1696 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1697 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1698 }
1699 _lck_rw_lock_type_panic(lck, lck_rw_type);
1700 }
1701
1702 __attribute__((always_inline))
1703 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1704 lck_rw_try_lock_shared_internal_inline(
1705 lck_rw_t *lock,
1706 void *caller)
1707 {
1708 #pragma unused(caller)
1709
1710 uint32_t data, prev;
1711 thread_t thread = current_thread();
1712 #ifdef DEBUG_RW
1713 boolean_t check_canlock = TRUE;
1714 #endif
1715
1716 for (;;) {
1717 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1718 if (data & LCK_RW_INTERLOCK) {
1719 atomic_exchange_abort();
1720 lck_rw_interlock_spin(lock);
1721 continue;
1722 }
1723 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1724 atomic_exchange_abort();
1725 return FALSE; /* lock is busy */
1726 }
1727 #ifdef DEBUG_RW
1728 if ((data & LCK_RW_SHARED_MASK) == 0) {
1729 /*
1730 * If the lock is uncontended,
1731 * we do not need to check if we can lock it
1732 */
1733 check_canlock = FALSE;
1734 }
1735 #endif
1736 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1737 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1738 break;
1739 }
1740 cpu_pause();
1741 }
1742 #ifdef DEBUG_RW
1743 if (check_canlock) {
1744 /*
1745 * Best effort attempt to check that this thread
1746 * is not already holding the lock (this checks read mode too).
1747 */
1748 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1749 }
1750 #endif
1751 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1752 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1753
1754 if (lock->lck_rw_can_sleep) {
1755 lck_rw_inc_thread_count(thread);
1756 } else if (get_preemption_level() == 0) {
1757 panic("Taking non-sleepable RW lock with preemption enabled");
1758 }
1759
1760 #if CONFIG_DTRACE
1761 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1762 #endif /* CONFIG_DTRACE */
1763
1764 #ifdef DEBUG_RW
1765 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1766 #endif /* DEBUG_RW */
1767 return TRUE;
1768 }
1769
1770 __attribute__((noinline))
1771 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1772 lck_rw_try_lock_shared_internal(
1773 lck_rw_t *lock,
1774 void *caller)
1775 {
1776 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1777 }
1778
1779 /*!
1780 * @function lck_rw_try_lock_shared
1781 *
1782 * @abstract
1783 * Tries to locks a rw_lock in read mode.
1784 *
1785 * @discussion
1786 * This function will return and not block in case the lock is already held.
1787 * See lck_rw_lock_shared for more details.
1788 *
1789 * @param lock rw_lock to lock.
1790 *
1791 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1792 */
1793 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)1794 lck_rw_try_lock_shared(
1795 lck_rw_t *lock)
1796 {
1797 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
1798 }
1799
1800 __attribute__((always_inline))
1801 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)1802 lck_rw_try_lock_exclusive_internal_inline(
1803 lck_rw_t *lock,
1804 void *caller)
1805 {
1806 #pragma unused(caller)
1807 uint32_t data, prev;
1808
1809 for (;;) {
1810 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1811 if (data & LCK_RW_INTERLOCK) {
1812 atomic_exchange_abort();
1813 lck_rw_interlock_spin(lock);
1814 continue;
1815 }
1816 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1817 atomic_exchange_abort();
1818 return FALSE;
1819 }
1820 data |= LCK_RW_WANT_EXCL;
1821 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1822 break;
1823 }
1824 cpu_pause();
1825 }
1826 thread_t thread = current_thread();
1827
1828 if (lock->lck_rw_can_sleep) {
1829 lck_rw_inc_thread_count(thread);
1830 } else if (get_preemption_level() == 0) {
1831 panic("Taking non-sleepable RW lock with preemption enabled");
1832 }
1833
1834 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1835 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1836
1837 ordered_store_rw_owner(lock, thread);
1838 #if CONFIG_DTRACE
1839 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1840 #endif /* CONFIG_DTRACE */
1841
1842 #ifdef DEBUG_RW
1843 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1844 #endif /* DEBUG_RW */
1845 return TRUE;
1846 }
1847
1848 __attribute__((noinline))
1849 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)1850 lck_rw_try_lock_exclusive_internal(
1851 lck_rw_t *lock,
1852 void *caller)
1853 {
1854 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
1855 }
1856
1857 /*!
1858 * @function lck_rw_try_lock_exclusive
1859 *
1860 * @abstract
1861 * Tries to locks a rw_lock in write mode.
1862 *
1863 * @discussion
1864 * This function will return and not block in case the lock is already held.
1865 * See lck_rw_lock_exclusive for more details.
1866 *
1867 * @param lock rw_lock to lock.
1868 *
1869 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1870 */
1871 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)1872 lck_rw_try_lock_exclusive(
1873 lck_rw_t *lock)
1874 {
1875 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
1876 }
1877
1878 /*
1879 * Very sad hack, but the codegen for lck_rw_try_lock
1880 * is very unhappy with the combination of __builtin_return_address()
1881 * and a noreturn function. For some reason it adds more frames
1882 * than it should. rdar://76570684
1883 */
1884 boolean_t
1885 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1886 #pragma clang diagnostic push
1887 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1888 __attribute__((noinline, weak))
1889 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1890 _lck_rw_try_lock_type_panic(
1891 lck_rw_t *lck,
1892 lck_rw_type_t lck_rw_type)
1893 {
1894 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1895 }
1896 #pragma clang diagnostic pop
1897
1898 /*!
1899 * @function lck_rw_try_lock
1900 *
1901 * @abstract
1902 * Tries to locks a rw_lock with the specified type.
1903 *
1904 * @discussion
1905 * This function will return and not wait/block in case the lock is already held.
1906 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
1907 *
1908 * @param lck rw_lock to lock.
1909 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1910 *
1911 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1912 */
1913 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1914 lck_rw_try_lock(
1915 lck_rw_t *lck,
1916 lck_rw_type_t lck_rw_type)
1917 {
1918 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1919 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
1920 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1921 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
1922 }
1923 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
1924 }
1925
1926 /*
1927 * Routine: lck_rw_done_gen
1928 *
1929 * prior_lock_state is the value in the 1st
1930 * word of the lock at the time of a successful
1931 * atomic compare and exchange with the new value...
1932 * it represents the state of the lock before we
1933 * decremented the rw_shared_count or cleared either
1934 * rw_want_upgrade or rw_want_write and
1935 * the lck_x_waiting bits... since the wrapper
1936 * routine has already changed the state atomically,
1937 * we just need to decide if we should
1938 * wake up anyone and what value to return... we do
1939 * this by examining the state of the lock before
1940 * we changed it
1941 */
1942 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)1943 lck_rw_done_gen(
1944 lck_rw_t *lck,
1945 uint32_t prior_lock_state)
1946 {
1947 lck_rw_word_t fake_lck;
1948 lck_rw_type_t lock_type;
1949 thread_t thread;
1950 uint32_t rwlock_count;
1951
1952 /*
1953 * prior_lock state is a snapshot of the 1st word of the
1954 * lock in question... we'll fake up a pointer to it
1955 * and carefully not access anything beyond whats defined
1956 * in the first word of a lck_rw_t
1957 */
1958 fake_lck.data = prior_lock_state;
1959
1960 if (fake_lck.shared_count <= 1) {
1961 if (fake_lck.w_waiting) {
1962 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1963 }
1964
1965 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1966 thread_wakeup(LCK_RW_READER_EVENT(lck));
1967 }
1968 }
1969 if (fake_lck.shared_count) {
1970 lock_type = LCK_RW_TYPE_SHARED;
1971 } else {
1972 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1973 }
1974
1975 /* Check if dropping the lock means that we need to unpromote */
1976 thread = current_thread();
1977 if (fake_lck.can_sleep) {
1978 rwlock_count = thread->rwlock_count--;
1979 } else {
1980 rwlock_count = UINT32_MAX;
1981 }
1982
1983 if (rwlock_count == 0) {
1984 panic("rw lock count underflow for thread %p", thread);
1985 }
1986
1987 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1988 /* sched_flags checked without lock, but will be rechecked while clearing */
1989 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1990 }
1991 #if CONFIG_DTRACE
1992 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1993 #endif
1994
1995 #ifdef DEBUG_RW
1996 remove_held_rwlock(lck, thread, lock_type);
1997 #endif /* DEBUG_RW */
1998 return lock_type;
1999 }
2000
2001 /*!
2002 * @function lck_rw_done
2003 *
2004 * @abstract
2005 * Force unlocks a rw_lock without consistency checks.
2006 *
2007 * @discussion
2008 * Do not use unless sure you can avoid consistency checks.
2009 *
2010 * @param lock rw_lock to unlock.
2011 */
2012 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2013 lck_rw_done(
2014 lck_rw_t *lock)
2015 {
2016 uint32_t data, prev;
2017 boolean_t once = FALSE;
2018
2019 #ifdef DEBUG_RW
2020 /*
2021 * Best effort attempt to check that this thread
2022 * is holding the lock.
2023 */
2024 thread_t thread = current_thread();
2025 assert_held_rwlock(lock, thread, 0);
2026 #endif /* DEBUG_RW */
2027 for (;;) {
2028 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2029 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2030 atomic_exchange_abort();
2031 lck_rw_interlock_spin(lock);
2032 continue;
2033 }
2034 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2035 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2036 data -= LCK_RW_SHARED_READER;
2037 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2038 goto check_waiters;
2039 }
2040 } else { /* if reader count == 0, must be exclusive lock */
2041 if (data & LCK_RW_WANT_UPGRADE) {
2042 data &= ~(LCK_RW_WANT_UPGRADE);
2043 } else {
2044 if (data & LCK_RW_WANT_EXCL) {
2045 data &= ~(LCK_RW_WANT_EXCL);
2046 } else { /* lock is not 'owned', panic */
2047 panic("Releasing non-exclusive RW lock without a reader refcount!");
2048 }
2049 }
2050 if (!once) {
2051 // Only check for holder and clear it once
2052 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2053 ordered_store_rw_owner(lock, THREAD_NULL);
2054 once = TRUE;
2055 }
2056 check_waiters:
2057 /*
2058 * test the original values to match what
2059 * lck_rw_done_gen is going to do to determine
2060 * which wakeups need to happen...
2061 *
2062 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2063 */
2064 if (prev & LCK_RW_W_WAITING) {
2065 data &= ~(LCK_RW_W_WAITING);
2066 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2067 data &= ~(LCK_RW_R_WAITING);
2068 }
2069 } else {
2070 data &= ~(LCK_RW_R_WAITING);
2071 }
2072 }
2073 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2074 break;
2075 }
2076 cpu_pause();
2077 }
2078 return lck_rw_done_gen(lock, prev);
2079 }
2080
2081 /*!
2082 * @function lck_rw_unlock_shared
2083 *
2084 * @abstract
2085 * Unlocks a rw_lock previously locked in shared mode.
2086 *
2087 * @discussion
2088 * The same thread that locked the lock needs to unlock it.
2089 *
2090 * @param lck rw_lock held in shared mode to unlock.
2091 */
2092 void
lck_rw_unlock_shared(lck_rw_t * lck)2093 lck_rw_unlock_shared(
2094 lck_rw_t *lck)
2095 {
2096 lck_rw_type_t ret;
2097
2098 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
2099 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2100 ret = lck_rw_done(lck);
2101
2102 if (ret != LCK_RW_TYPE_SHARED) {
2103 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2104 }
2105 }
2106
2107 /*!
2108 * @function lck_rw_unlock_exclusive
2109 *
2110 * @abstract
2111 * Unlocks a rw_lock previously locked in exclusive mode.
2112 *
2113 * @discussion
2114 * The same thread that locked the lock needs to unlock it.
2115 *
2116 * @param lck rw_lock held in exclusive mode to unlock.
2117 */
2118 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2119 lck_rw_unlock_exclusive(
2120 lck_rw_t *lck)
2121 {
2122 lck_rw_type_t ret;
2123
2124 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
2125 ret = lck_rw_done(lck);
2126
2127 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2128 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2129 }
2130 }
2131
2132 /*!
2133 * @function lck_rw_unlock
2134 *
2135 * @abstract
2136 * Unlocks a rw_lock previously locked with lck_rw_type.
2137 *
2138 * @discussion
2139 * The lock must be unlocked by the same thread it was locked from.
2140 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2141 * holding the lock.
2142 *
2143 * @param lck rw_lock to unlock.
2144 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2145 */
2146 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2147 lck_rw_unlock(
2148 lck_rw_t *lck,
2149 lck_rw_type_t lck_rw_type)
2150 {
2151 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2152 lck_rw_unlock_shared(lck);
2153 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2154 lck_rw_unlock_exclusive(lck);
2155 } else {
2156 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2157 }
2158 }
2159
2160 /*!
2161 * @function lck_rw_assert
2162 *
2163 * @abstract
2164 * Asserts the rw_lock is held.
2165 *
2166 * @discussion
2167 * read-write locks do not have a concept of ownership when held in shared mode,
2168 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2169 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2170 * this function can be more accurate.
2171 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2172 * LCK_RW_ASSERT_NOTHELD.
2173 *
2174 * @param lck rw_lock to check.
2175 * @param type assert type
2176 */
2177 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2178 lck_rw_assert(
2179 lck_rw_t *lck,
2180 unsigned int type)
2181 {
2182 #if DEBUG_RW
2183 thread_t thread = current_thread();
2184 #endif /* DEBUG_RW */
2185
2186 switch (type) {
2187 case LCK_RW_ASSERT_SHARED:
2188 if ((lck->lck_rw_shared_count != 0) &&
2189 (lck->lck_rw_owner == THREAD_NULL)) {
2190 #if DEBUG_RW
2191 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2192 #endif /* DEBUG_RW */
2193 return;
2194 }
2195 break;
2196 case LCK_RW_ASSERT_EXCLUSIVE:
2197 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2198 (lck->lck_rw_shared_count == 0) &&
2199 (lck->lck_rw_owner == current_thread())) {
2200 #if DEBUG_RW
2201 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2202 #endif /* DEBUG_RW */
2203 return;
2204 }
2205 break;
2206 case LCK_RW_ASSERT_HELD:
2207 if (lck->lck_rw_shared_count != 0) {
2208 #if DEBUG_RW
2209 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2210 #endif /* DEBUG_RW */
2211 return; // Held shared
2212 }
2213 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2214 (lck->lck_rw_owner == current_thread())) {
2215 #if DEBUG_RW
2216 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2217 #endif /* DEBUG_RW */
2218 return; // Held exclusive
2219 }
2220 break;
2221 case LCK_RW_ASSERT_NOTHELD:
2222 if ((lck->lck_rw_shared_count == 0) &&
2223 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2224 (lck->lck_rw_owner == THREAD_NULL)) {
2225 #ifdef DEBUG_RW
2226 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2227 #endif /* DEBUG_RW */
2228 return;
2229 }
2230 break;
2231 default:
2232 break;
2233 }
2234 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2235 }
2236
2237 /*!
2238 * @function kdp_lck_rw_lock_is_acquired_exclusive
2239 *
2240 * @abstract
2241 * Checks if a rw_lock is held exclusevely.
2242 *
2243 * @discussion
2244 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2245 *
2246 * @param lck lock to check
2247 *
2248 * @returns TRUE if the lock is held exclusevely
2249 */
2250 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2251 kdp_lck_rw_lock_is_acquired_exclusive(
2252 lck_rw_t *lck)
2253 {
2254 if (not_in_kdp) {
2255 panic("panic: rw lock exclusive check done outside of kernel debugger");
2256 }
2257 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2258 }
2259
2260 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2261 kdp_rwlck_find_owner(
2262 __unused struct waitq *waitq,
2263 event64_t event,
2264 thread_waitinfo_t *waitinfo)
2265 {
2266 lck_rw_t *rwlck = NULL;
2267 switch (waitinfo->wait_type) {
2268 case kThreadWaitKernelRWLockRead:
2269 rwlck = READ_EVENT_TO_RWLOCK(event);
2270 break;
2271 case kThreadWaitKernelRWLockWrite:
2272 case kThreadWaitKernelRWLockUpgrade:
2273 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2274 break;
2275 default:
2276 panic("%s was called with an invalid blocking type", __FUNCTION__);
2277 break;
2278 }
2279 if (rwlck->lck_rw_owner) {
2280 thread_require(rwlck->lck_rw_owner);
2281 }
2282 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2283 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2284 }
2285
2286 /*!
2287 * @function lck_rw_lock_yield_shared
2288 *
2289 * @abstract
2290 * Yields a rw_lock held in shared mode.
2291 *
2292 * @discussion
2293 * This function can block.
2294 * Yields the lock in case there are writers waiting.
2295 * The yield will unlock, block, and re-lock the lock in shared mode.
2296 *
2297 * @param lck rw_lock already held in shared mode to yield.
2298 * @param force_yield if set to true it will always yield irrespective of the lock status
2299 *
2300 * @returns TRUE if the lock was yield, FALSE otherwise
2301 */
2302 boolean_t
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2303 lck_rw_lock_yield_shared(
2304 lck_rw_t *lck,
2305 boolean_t force_yield)
2306 {
2307 lck_rw_word_t word;
2308
2309 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2310
2311 word.data = ordered_load_rw(lck);
2312 if (word.want_excl || word.want_upgrade || force_yield) {
2313 lck_rw_unlock_shared(lck);
2314 mutex_pause(2);
2315 lck_rw_lock_shared(lck);
2316 return TRUE;
2317 }
2318
2319 return FALSE;
2320 }
2321
2322 /*!
2323 * @function lck_rw_sleep
2324 *
2325 * @abstract
2326 * Assert_wait on an event while holding the rw_lock.
2327 *
2328 * @discussion
2329 * the flags can decide how to re-acquire the lock upon wake up
2330 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2331 * and if the priority needs to be kept boosted until the lock is
2332 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2333 *
2334 * @param lck rw_lock to use to synch the assert_wait.
2335 * @param lck_sleep_action flags.
2336 * @param event event to assert_wait on.
2337 * @param interruptible wait type.
2338 */
2339 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2340 lck_rw_sleep(
2341 lck_rw_t *lck,
2342 lck_sleep_action_t lck_sleep_action,
2343 event_t event,
2344 wait_interrupt_t interruptible)
2345 {
2346 wait_result_t res;
2347 lck_rw_type_t lck_rw_type;
2348 thread_pri_floor_t token;
2349
2350 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2351 panic("Invalid lock sleep action %x", lck_sleep_action);
2352 }
2353
2354 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2355 /*
2356 * Although we are dropping the RW lock, the intent in most cases
2357 * is that this thread remains as an observer, since it may hold
2358 * some secondary resource, but must yield to avoid deadlock. In
2359 * this situation, make sure that the thread is boosted to the
2360 * ceiling while blocked, so that it can re-acquire the
2361 * RW lock at that priority.
2362 */
2363 token = thread_priority_floor_start();
2364 }
2365
2366 res = assert_wait(event, interruptible);
2367 if (res == THREAD_WAITING) {
2368 lck_rw_type = lck_rw_done(lck);
2369 res = thread_block(THREAD_CONTINUE_NULL);
2370 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2371 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2372 lck_rw_lock(lck, lck_rw_type);
2373 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2374 lck_rw_lock_exclusive(lck);
2375 } else {
2376 lck_rw_lock_shared(lck);
2377 }
2378 }
2379 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2380 (void)lck_rw_done(lck);
2381 }
2382
2383 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2384 thread_priority_floor_end(&token);
2385 }
2386
2387 return res;
2388 }
2389
2390 /*!
2391 * @function lck_rw_sleep_deadline
2392 *
2393 * @abstract
2394 * Assert_wait_deadline on an event while holding the rw_lock.
2395 *
2396 * @discussion
2397 * the flags can decide how to re-acquire the lock upon wake up
2398 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2399 * and if the priority needs to be kept boosted until the lock is
2400 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2401 *
2402 * @param lck rw_lock to use to synch the assert_wait.
2403 * @param lck_sleep_action flags.
2404 * @param event event to assert_wait on.
2405 * @param interruptible wait type.
2406 * @param deadline maximum time after which being woken up
2407 */
2408 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2409 lck_rw_sleep_deadline(
2410 lck_rw_t *lck,
2411 lck_sleep_action_t lck_sleep_action,
2412 event_t event,
2413 wait_interrupt_t interruptible,
2414 uint64_t deadline)
2415 {
2416 wait_result_t res;
2417 lck_rw_type_t lck_rw_type;
2418 thread_pri_floor_t token;
2419
2420 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2421 panic("Invalid lock sleep action %x", lck_sleep_action);
2422 }
2423
2424 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2425 token = thread_priority_floor_start();
2426 }
2427
2428 res = assert_wait_deadline(event, interruptible, deadline);
2429 if (res == THREAD_WAITING) {
2430 lck_rw_type = lck_rw_done(lck);
2431 res = thread_block(THREAD_CONTINUE_NULL);
2432 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2433 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2434 lck_rw_lock(lck, lck_rw_type);
2435 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2436 lck_rw_lock_exclusive(lck);
2437 } else {
2438 lck_rw_lock_shared(lck);
2439 }
2440 }
2441 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2442 (void)lck_rw_done(lck);
2443 }
2444
2445 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2446 thread_priority_floor_end(&token);
2447 }
2448
2449 return res;
2450 }
2451
2452 /*
2453 * Reader-writer lock promotion
2454 *
2455 * We support a limited form of reader-writer
2456 * lock promotion whose effects are:
2457 *
2458 * * Qualifying threads have decay disabled
2459 * * Scheduler priority is reset to a floor of
2460 * of their statically assigned priority
2461 * or MINPRI_RWLOCK
2462 *
2463 * The rationale is that lck_rw_ts do not have
2464 * a single owner, so we cannot apply a directed
2465 * priority boost from all waiting threads
2466 * to all holding threads without maintaining
2467 * lists of all shared owners and all waiting
2468 * threads for every lock.
2469 *
2470 * Instead (and to preserve the uncontended fast-
2471 * path), acquiring (or attempting to acquire)
2472 * a RW lock in shared or exclusive lock increments
2473 * a per-thread counter. Only if that thread stops
2474 * making forward progress (for instance blocking
2475 * on a mutex, or being preempted) do we consult
2476 * the counter and apply the priority floor.
2477 * When the thread becomes runnable again (or in
2478 * the case of preemption it never stopped being
2479 * runnable), it has the priority boost and should
2480 * be in a good position to run on the CPU and
2481 * release all RW locks (at which point the priority
2482 * boost is cleared).
2483 *
2484 * Care must be taken to ensure that priority
2485 * boosts are not retained indefinitely, since unlike
2486 * mutex priority boosts (where the boost is tied
2487 * to the mutex lifecycle), the boost is tied
2488 * to the thread and independent of any particular
2489 * lck_rw_t. Assertions are in place on return
2490 * to userspace so that the boost is not held
2491 * indefinitely.
2492 *
2493 * The routines that increment/decrement the
2494 * per-thread counter should err on the side of
2495 * incrementing any time a preemption is possible
2496 * and the lock would be visible to the rest of the
2497 * system as held (so it should be incremented before
2498 * interlocks are dropped/preemption is enabled, or
2499 * before a CAS is executed to acquire the lock).
2500 *
2501 */
2502
2503 /*!
2504 * @function lck_rw_clear_promotion
2505 *
2506 * @abstract
2507 * Undo priority promotions when the last rw_lock
2508 * is released by a thread (if a promotion was active).
2509 *
2510 * @param thread thread to demote.
2511 * @param trace_obj object reason for the demotion.
2512 */
2513 void
lck_rw_clear_promotion(thread_t thread,uintptr_t trace_obj)2514 lck_rw_clear_promotion(
2515 thread_t thread,
2516 uintptr_t trace_obj)
2517 {
2518 assert(thread->rwlock_count == 0);
2519
2520 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2521 spl_t s = splsched();
2522 thread_lock(thread);
2523
2524 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2525 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
2526 }
2527
2528 thread_unlock(thread);
2529 splx(s);
2530 }
2531
2532 /*!
2533 * @function lck_rw_set_promotion_locked
2534 *
2535 * @abstract
2536 * Callout from context switch if the thread goes
2537 * off core with a positive rwlock_count.
2538 *
2539 * @discussion
2540 * Called at splsched with the thread locked.
2541 *
2542 * @param thread thread to promote.
2543 */
2544 void
lck_rw_set_promotion_locked(thread_t thread)2545 lck_rw_set_promotion_locked(thread_t thread)
2546 {
2547 if (LcksOpts & disLkRWPrio) {
2548 return;
2549 }
2550
2551 assert(thread->rwlock_count > 0);
2552
2553 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2554 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2555 }
2556 }
2557
2558 #if __x86_64__
2559 void lck_rw_clear_promotions_x86(thread_t thread);
2560 /*
2561 * On return to userspace, this routine is called from assembly
2562 * if the rwlock_count is somehow imbalanced
2563 */
2564 #if MACH_LDEBUG
2565 __dead2
2566 #endif /* MACH_LDEBUG */
2567 void
lck_rw_clear_promotions_x86(thread_t thread)2568 lck_rw_clear_promotions_x86(thread_t thread)
2569 {
2570 #if MACH_LDEBUG
2571 /* It's fatal to leave a RW lock locked and return to userspace */
2572 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
2573 #else
2574 /* Paper over the issue */
2575 thread->rwlock_count = 0;
2576 lck_rw_clear_promotion(thread, 0);
2577 #endif /* MACH_LDEBUG */
2578 }
2579 #endif /* __x86_64__ */
2580