1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #define LOCK_PRIVATE 1
57 #include <debug.h>
58 #include <kern/locks_internal.h>
59 #include <kern/lock_stat.h>
60 #include <kern/locks.h>
61 #include <kern/zalloc.h>
62 #include <kern/thread.h>
63 #include <kern/processor.h>
64 #include <kern/sched_prim.h>
65 #include <kern/debug.h>
66 #include <machine/atomic.h>
67 #include <machine/machine_cpu.h>
68
69 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76 #if CONFIG_DTRACE
77 #define DTRACE_RW_SHARED 0x0 //reader
78 #define DTRACE_RW_EXCL 0x1 //writer
79 #define DTRACE_NO_FLAG 0x0 //not applicable
80 #endif /* CONFIG_DTRACE */
81
82 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84 #define LCK_RW_LCK_SHARED_CODE 0x102
85 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89 #if __x86_64__
90 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98 #endif
99
100 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107 #ifdef DEBUG_RW
108 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
109 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
110 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
111 #define rw_lock_debug_disabled() ((LcksOpts & disLkRWDebug) == disLkRWDebug)
112
113 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
114 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
115
116 #endif /* DEBUG_RW */
117
118 /*!
119 * @function lck_rw_alloc_init
120 *
121 * @abstract
122 * Allocates and initializes a rw_lock_t.
123 *
124 * @discussion
125 * The function can block. See lck_rw_init() for initialization details.
126 *
127 * @param grp lock group to associate with the lock.
128 * @param attr lock attribute to initialize the lock.
129 *
130 * @returns NULL or the allocated lock
131 */
132 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)133 lck_rw_alloc_init(
134 lck_grp_t *grp,
135 lck_attr_t *attr)
136 {
137 lck_rw_t *lck;
138
139 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
140 lck_rw_init(lck, grp, attr);
141 return lck;
142 }
143
144 /*!
145 * @function lck_rw_init
146 *
147 * @abstract
148 * Initializes a rw_lock_t.
149 *
150 * @discussion
151 * Usage statistics for the lock are going to be added to the lock group provided.
152 *
153 * The lock attribute can be used to specify the lock contention behaviour.
154 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
155 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
156 *
157 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
158 * if the lock is held and a writer starts waiting for the lock, readers will not be able
159 * to acquire the lock until all writers stop contending. Readers could
160 * potentially starve.
161 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
162 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
163 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
164 * starve.
165 *
166 * @param lck lock to initialize.
167 * @param grp lock group to associate with the lock.
168 * @param attr lock attribute to initialize the lock.
169 *
170 */
171 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)172 lck_rw_init(
173 lck_rw_t *lck,
174 lck_grp_t *grp,
175 lck_attr_t *attr)
176 {
177 /* keep this so that the lck_type_t type is referenced for lldb */
178 lck_type_t type = LCK_TYPE_RW;
179
180 if (attr == LCK_ATTR_NULL) {
181 attr = &lck_attr_default;
182 }
183 *lck = (lck_rw_t){
184 .lck_rw_type = type,
185 .lck_rw_can_sleep = true,
186 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
187 };
188 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
189 }
190
191 /*!
192 * @function lck_rw_free
193 *
194 * @abstract
195 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
196 *
197 * @discussion
198 * The lock must be not held by any thread.
199 *
200 * @param lck rw_lock to free.
201 */
202 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)203 lck_rw_free(
204 lck_rw_t *lck,
205 lck_grp_t *grp)
206 {
207 lck_rw_destroy(lck, grp);
208 zfree(KT_LCK_RW, lck);
209 }
210
211 /*!
212 * @function lck_rw_destroy
213 *
214 * @abstract
215 * Destroys a rw_lock previously initialized with lck_rw_init().
216 *
217 * @discussion
218 * The lock must be not held by any thread.
219 *
220 * @param lck rw_lock to destroy.
221 */
222 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)223 lck_rw_destroy(
224 lck_rw_t *lck,
225 lck_grp_t *grp)
226 {
227 if (lck->lck_rw_type != LCK_TYPE_RW ||
228 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
229 panic("Destroying previously destroyed lock %p", lck);
230 }
231 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
232
233 lck->lck_rw_type = LCK_TYPE_NONE;
234 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
235 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
236 }
237
238 #ifdef DEBUG_RW
239
240 /*
241 * Best effort mechanism to debug rw_locks.
242 *
243 * This mechanism is in addition to the owner checks. The owner is set
244 * only when the lock is held in exclusive mode so the checks do not cover
245 * the cases in which the lock is held in shared mode.
246 *
247 * This mechanism tentatively stores the rw_lock acquired and its debug
248 * information on the thread struct.
249 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
250 *
251 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
252 * at the same time. If a thread holds more than this number of rw_locks we
253 * will start losing debug information.
254 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
255 * store the debug information but it will require more memory per thread
256 * and longer lock/unlock time.
257 *
258 * If an empty slot is found for the debug information, we record the lock
259 * otherwise we set the overflow threshold flag.
260 *
261 * If we reached the overflow threshold we might stop asserting because we cannot be sure
262 * anymore if the lock was acquired or not.
263 *
264 * Even if we reached the overflow threshold, we try to store the debug information
265 * for the new locks acquired. This can be useful in core dumps to debug
266 * possible return to userspace without unlocking and to find possible readers
267 * holding the lock.
268 */
269 __startup_func
270 static void
rw_lock_init(void)271 rw_lock_init(void)
272 {
273 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
274 LcksOpts |= disLkRWDebug;
275 }
276 }
277 STARTUP(LOCKS, STARTUP_RANK_FIRST, rw_lock_init);
278
279 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)280 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
281 {
282 int i;
283 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
284 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
285 if (existing->rwlde_lock == lock) {
286 return existing;
287 }
288 }
289
290 return NULL;
291 }
292
293 __abortlike
294 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)295 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
296 {
297 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
298 }
299
300 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)301 find_empty_slot(rw_lock_debug_t *rw_locks_held)
302 {
303 int i;
304 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
305 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
306 if (entry->rwlde_lock == NULL) {
307 return entry;
308 }
309 }
310 rwlock_slot_panic(rw_locks_held);
311 }
312
313 __abortlike
314 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)315 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
316 {
317 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
318 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
319 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
320 }
321
322 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)323 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
324 {
325 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
326
327 if (__probable(rw_lock_debug_disabled() || (rw_locks_held->rwld_locks_acquired == 0))) {
328 //no locks saved, safe to lock
329 return;
330 }
331
332 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
333 if (__improbable(entry != NULL)) {
334 boolean_t can_be_shared_recursive;
335 if (lck_rw_recursive_shared_assert_74048094) {
336 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
337 } else {
338 /* currently rw_lock_shared is called recursively,
339 * until the code is fixed allow to lock
340 * recursively in shared mode
341 */
342 can_be_shared_recursive = TRUE;
343 }
344 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
345 return;
346 }
347 canlock_rwlock_panic(lock, thread, entry);
348 }
349 }
350
351 __abortlike
352 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)353 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
354 {
355 panic("RW lock %p not held by %p", lock, thread);
356 }
357
358 __abortlike
359 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)360 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
361 {
362 if (type == LCK_RW_TYPE_EXCLUSIVE) {
363 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
364 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
365 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
366 } else {
367 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
368 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
369 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
370 }
371 }
372
373 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)374 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
375 {
376 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
377
378 if (__probable(rw_lock_debug_disabled())) {
379 return;
380 }
381
382 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
383 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
384 held_rwlock_notheld_panic(lock, thread);
385 }
386 return;
387 }
388
389 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
390 if (__probable(entry != NULL)) {
391 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
392 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
393 } else {
394 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
395 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
396 }
397 }
398 } else {
399 if (rw_locks_held->rwld_overflow == 0) {
400 held_rwlock_notheld_panic(lock, thread);
401 }
402 }
403 }
404
405 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)406 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
407 {
408 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
409
410 if (__probable(rw_lock_debug_disabled())) {
411 return;
412 }
413
414 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
415 if (rw_locks_held->rwld_overflow == 0) {
416 held_rwlock_notheld_panic(lock, thread);
417 }
418 return;
419 }
420
421 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
422 if (__probable(entry != NULL)) {
423 if (typeFrom == LCK_RW_TYPE_SHARED) {
424 //We are upgrading
425 assertf(entry->rwlde_mode_count == 1,
426 "RW lock %p not held by a single shared when upgrading "
427 "by %p caller %p read %d state 0x%x owner 0x%p ",
428 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
429 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
430 entry->rwlde_mode_count = -1;
431 set_rwlde_caller_packed(entry, caller);
432 } else {
433 //We are downgrading
434 assertf(entry->rwlde_mode_count == -1,
435 "RW lock %p not held in write mode when downgrading "
436 "by %p caller %p read %d state 0x%x owner 0x%p ",
437 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
438 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
439 entry->rwlde_mode_count = 1;
440 set_rwlde_caller_packed(entry, caller);
441 }
442 return;
443 }
444
445 if (rw_locks_held->rwld_overflow == 0) {
446 held_rwlock_notheld_panic(lock, thread);
447 }
448
449 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
450 //array is full
451 return;
452 }
453
454 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
455 null_entry->rwlde_lock = lock;
456 set_rwlde_caller_packed(null_entry, caller);
457 if (typeFrom == LCK_RW_TYPE_SHARED) {
458 null_entry->rwlde_mode_count = -1;
459 } else {
460 null_entry->rwlde_mode_count = 1;
461 }
462 rw_locks_held->rwld_locks_saved++;
463 }
464
465 __abortlike
466 static void
add_held_rwlock_too_many_panic(thread_t thread)467 add_held_rwlock_too_many_panic(thread_t thread)
468 {
469 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
470 }
471
472 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)473 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
474 {
475 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
476 struct rw_lock_debug_entry *null_entry;
477
478 if (__probable(rw_lock_debug_disabled())) {
479 return;
480 }
481
482 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
483 add_held_rwlock_too_many_panic(thread);
484 }
485 rw_locks_held->rwld_locks_acquired++;
486
487 if (type == LCK_RW_TYPE_EXCLUSIVE) {
488 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
489 //array is full
490 rw_locks_held->rwld_overflow = 1;
491 return;
492 }
493 null_entry = find_empty_slot(rw_locks_held);
494 null_entry->rwlde_lock = lock;
495 set_rwlde_caller_packed(null_entry, caller);
496 null_entry->rwlde_mode_count = -1;
497 rw_locks_held->rwld_locks_saved++;
498 return;
499 } else {
500 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
501 //array is empty
502 goto add_shared;
503 }
504
505 boolean_t allow_shared_recursive;
506 if (lck_rw_recursive_shared_assert_74048094) {
507 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
508 } else {
509 allow_shared_recursive = TRUE;
510 }
511 if (allow_shared_recursive) {
512 //It could be already locked in shared mode
513 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
514 if (entry != NULL) {
515 assert(entry->rwlde_mode_count > 0);
516 assertf(entry->rwlde_mode_count != INT8_MAX,
517 "RW lock %p with too many recursive shared held "
518 "from %p caller %p read %d state 0x%x owner 0x%p",
519 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
520 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
521 entry->rwlde_mode_count += 1;
522 return;
523 }
524 }
525
526 //none of the locks were a match
527 //try to add a new entry
528 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
529 //array is full
530 rw_locks_held->rwld_overflow = 1;
531 return;
532 }
533
534 add_shared:
535 null_entry = find_empty_slot(rw_locks_held);
536 null_entry->rwlde_lock = lock;
537 set_rwlde_caller_packed(null_entry, caller);
538 null_entry->rwlde_mode_count = 1;
539 rw_locks_held->rwld_locks_saved++;
540 }
541 }
542
543 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)544 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
545 {
546 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
547
548 if (__probable(rw_lock_debug_disabled())) {
549 return;
550 }
551
552 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
553 return;
554 }
555 rw_locks_held->rwld_locks_acquired--;
556
557 if (rw_locks_held->rwld_locks_saved == 0) {
558 assert(rw_locks_held->rwld_overflow == 1);
559 goto out;
560 }
561
562 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
563 if (__probable(entry != NULL)) {
564 if (type == LCK_RW_TYPE_EXCLUSIVE) {
565 assert(entry->rwlde_mode_count == -1);
566 entry->rwlde_mode_count = 0;
567 } else {
568 assert(entry->rwlde_mode_count > 0);
569 entry->rwlde_mode_count--;
570 if (entry->rwlde_mode_count > 0) {
571 goto out;
572 }
573 }
574 entry->rwlde_caller_packed = 0;
575 entry->rwlde_lock = NULL;
576 rw_locks_held->rwld_locks_saved--;
577 } else {
578 assert(rw_locks_held->rwld_overflow == 1);
579 }
580
581 out:
582 if (rw_locks_held->rwld_locks_acquired == 0) {
583 rw_locks_held->rwld_overflow = 0;
584 }
585 return;
586 }
587 #endif /* DEBUG_RW */
588
589 /*
590 * We disable interrupts while holding the RW interlock to prevent an
591 * interrupt from exacerbating hold time.
592 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
593 */
594 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)595 lck_interlock_lock(
596 lck_rw_t *lck)
597 {
598 boolean_t istate;
599
600 istate = ml_set_interrupts_enabled(FALSE);
601 lck_rw_ilk_lock(lck);
602 return istate;
603 }
604
605 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)606 lck_interlock_unlock(
607 lck_rw_t *lck,
608 boolean_t istate)
609 {
610 lck_rw_ilk_unlock(lck);
611 ml_set_interrupts_enabled(istate);
612 }
613
614 /*
615 * compute the deadline to spin against when
616 * waiting for a change of state on a lck_rw_t
617 */
618 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)619 lck_rw_deadline_for_spin(
620 lck_rw_t *lck)
621 {
622 lck_rw_word_t word;
623
624 word.data = ordered_load_rw(lck);
625 if (word.can_sleep) {
626 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
627 /*
628 * there are already threads waiting on this lock... this
629 * implies that they have spun beyond their deadlines waiting for
630 * the desired state to show up so we will not bother spinning at this time...
631 * or
632 * the current number of threads sharing this lock exceeds our capacity to run them
633 * concurrently and since all states we're going to spin for require the rw_shared_count
634 * to be at 0, we'll not bother spinning since the latency for this to happen is
635 * unpredictable...
636 */
637 return mach_absolute_time();
638 }
639 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
640 } else {
641 return mach_absolute_time() + (100000LL * 1000000000LL);
642 }
643 }
644
645 /*
646 * This inline is used when busy-waiting for an rw lock.
647 * If interrupts were disabled when the lock primitive was called,
648 * we poll the IPI handler for pending tlb flushes in x86.
649 */
650 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)651 lck_rw_lock_pause(
652 boolean_t interrupts_enabled)
653 {
654 #if X86_64
655 if (!interrupts_enabled) {
656 handle_pending_TLB_flushes();
657 }
658 cpu_pause();
659 #else
660 (void) interrupts_enabled;
661 wait_for_event();
662 #endif
663 }
664
665 typedef enum __enum_closed {
666 LCK_RW_DRAIN_S_DRAINED = 0,
667 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
668 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
669 LCK_RW_DRAIN_S_TIMED_OUT = 3,
670 } lck_rw_drain_state_t;
671
672 static lck_rw_drain_state_t
673 lck_rw_drain_status(
674 lck_rw_t *lock,
675 uint32_t status_mask,
676 boolean_t wait,
677 bool (^lock_pause)(void))
678 {
679 uint64_t deadline = 0;
680 uint32_t data;
681 boolean_t istate = FALSE;
682
683 if (wait) {
684 deadline = lck_rw_deadline_for_spin(lock);
685 #if __x86_64__
686 istate = ml_get_interrupts_enabled();
687 #endif
688 }
689
690 for (;;) {
691 #if __x86_64__
692 data = os_atomic_load(&lock->lck_rw_data, relaxed);
693 #else
694 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
695 #endif
696 if ((data & status_mask) == 0) {
697 atomic_exchange_abort();
698 return LCK_RW_DRAIN_S_DRAINED;
699 }
700
701 if (!wait) {
702 atomic_exchange_abort();
703 return LCK_RW_DRAIN_S_NOT_DRAINED;
704 }
705
706 lck_rw_lock_pause(istate);
707
708 if (mach_absolute_time() >= deadline) {
709 return LCK_RW_DRAIN_S_TIMED_OUT;
710 }
711
712 if (lock_pause && lock_pause()) {
713 return LCK_RW_DRAIN_S_EARLY_RETURN;
714 }
715 }
716 }
717
718 /*
719 * Spin while interlock is held.
720 */
721 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)722 lck_rw_interlock_spin(
723 lck_rw_t *lock)
724 {
725 uint32_t data, prev;
726
727 for (;;) {
728 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
729 if (data & LCK_RW_INTERLOCK) {
730 #if __x86_64__
731 cpu_pause();
732 #else
733 wait_for_event();
734 #endif
735 } else {
736 atomic_exchange_abort();
737 return;
738 }
739 }
740 }
741
742 #define LCK_RW_GRAB_WANT 0
743 #define LCK_RW_GRAB_SHARED 1
744
745 typedef enum __enum_closed __enum_options {
746 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
747 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
748 LCK_RW_GRAB_F_WAIT = 0x2,
749 } lck_rw_grab_flags_t;
750
751 typedef enum __enum_closed {
752 LCK_RW_GRAB_S_NOT_LOCKED = 0,
753 LCK_RW_GRAB_S_LOCKED = 1,
754 LCK_RW_GRAB_S_EARLY_RETURN = 2,
755 LCK_RW_GRAB_S_TIMED_OUT = 3,
756 } lck_rw_grab_state_t;
757
758 static lck_rw_grab_state_t
759 lck_rw_grab(
760 lck_rw_t *lock,
761 lck_rw_grab_flags_t flags,
762 bool (^lock_pause)(void))
763 {
764 uint64_t deadline = 0;
765 uint32_t data, prev;
766 boolean_t do_exch, istate = FALSE;
767
768 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
769
770 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
771 deadline = lck_rw_deadline_for_spin(lock);
772 #if __x86_64__
773 istate = ml_get_interrupts_enabled();
774 #endif
775 }
776
777 for (;;) {
778 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
779 if (data & LCK_RW_INTERLOCK) {
780 atomic_exchange_abort();
781 lck_rw_interlock_spin(lock);
782 continue;
783 }
784 do_exch = FALSE;
785 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
786 if ((data & LCK_RW_WANT_EXCL) == 0) {
787 data |= LCK_RW_WANT_EXCL;
788 do_exch = TRUE;
789 }
790 } else { // LCK_RW_GRAB_SHARED
791 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
792 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
793 data += LCK_RW_SHARED_READER;
794 do_exch = TRUE;
795 }
796 }
797 if (do_exch) {
798 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
799 return LCK_RW_GRAB_S_LOCKED;
800 }
801 } else {
802 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
803 atomic_exchange_abort();
804 return LCK_RW_GRAB_S_NOT_LOCKED;
805 }
806
807 lck_rw_lock_pause(istate);
808
809 if (mach_absolute_time() >= deadline) {
810 return LCK_RW_GRAB_S_TIMED_OUT;
811 }
812 if (lock_pause && lock_pause()) {
813 return LCK_RW_GRAB_S_EARLY_RETURN;
814 }
815 }
816 }
817 }
818
819 /*
820 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
821 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
822 * should only be called when can_sleep is false.
823 */
824 static void
lck_rw_drop(lck_rw_t * lock,lck_rw_grab_flags_t flags)825 lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
826 {
827 uint32_t data, prev;
828
829 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
830 assert(!lock->lck_rw_can_sleep);
831
832 for (;;) {
833 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
834
835 /* Interlock should never be taken when can_sleep is false. */
836 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
837
838 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
839 data &= ~LCK_RW_WANT_EXCL;
840 } else {
841 data -= LCK_RW_SHARED_READER;
842 }
843
844 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
845 break;
846 }
847
848 cpu_pause();
849 }
850
851 return;
852 }
853
854 static boolean_t
855 lck_rw_lock_exclusive_gen(
856 lck_rw_t *lock,
857 bool (^lock_pause)(void))
858 {
859 __assert_only thread_t self = current_thread();
860 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
861 lck_rw_word_t word;
862 int slept = 0;
863 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
864 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
865 wait_result_t res = 0;
866 boolean_t istate;
867
868 #if CONFIG_DTRACE
869 boolean_t dtrace_ls_initialized = FALSE;
870 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
871 uint64_t wait_interval = 0;
872 int readers_at_sleep = 0;
873 #endif
874
875 assertf(lock->lck_rw_owner != self->ctid,
876 "Lock already held state=0x%x, owner=%p",
877 ordered_load_rw(lock), self);
878
879 #ifdef DEBUG_RW
880 /*
881 * Best effort attempt to check that this thread
882 * is not already holding the lock (this checks read mode too).
883 */
884 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
885 #endif /* DEBUG_RW */
886
887 /*
888 * Try to acquire the lck_rw_want_excl bit.
889 */
890 while (lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
891 #if CONFIG_DTRACE
892 if (dtrace_ls_initialized == FALSE) {
893 dtrace_ls_initialized = TRUE;
894 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
895 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
896 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
897 if (dtrace_ls_enabled) {
898 /*
899 * Either sleeping or spinning is happening,
900 * start a timing of our delay interval now.
901 */
902 readers_at_sleep = lock->lck_rw_shared_count;
903 wait_interval = mach_absolute_time();
904 }
905 }
906 #endif
907
908 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
909 trace_lck, 0, 0, 0, 0);
910
911 grab_state = lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
912
913 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
914 trace_lck, 0, 0, grab_state, 0);
915
916 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
917 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
918 break;
919 }
920 /*
921 * if we get here, the deadline has expired w/o us
922 * being able to grab the lock exclusively
923 * check to see if we're allowed to do a thread_block
924 */
925 word.data = ordered_load_rw(lock);
926 if (word.can_sleep) {
927 istate = lck_interlock_lock(lock);
928 word.data = ordered_load_rw(lock);
929
930 if (word.want_excl) {
931 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
932
933 word.w_waiting = 1;
934 ordered_store_rw(lock, word.data);
935
936 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
937 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
938 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
939 lck_interlock_unlock(lock, istate);
940 if (res == THREAD_WAITING) {
941 res = thread_block(THREAD_CONTINUE_NULL);
942 slept++;
943 }
944 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
945 } else {
946 word.want_excl = 1;
947 ordered_store_rw(lock, word.data);
948 lck_interlock_unlock(lock, istate);
949 break;
950 }
951 }
952 }
953
954 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
955 assert(lock_pause);
956 return FALSE;
957 }
958
959 /*
960 * Wait for readers (and upgrades) to finish...
961 */
962 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
963 #if CONFIG_DTRACE
964 /*
965 * Either sleeping or spinning is happening, start
966 * a timing of our delay interval now. If we set it
967 * to -1 we don't have accurate data so we cannot later
968 * decide to record a dtrace spin or sleep event.
969 */
970 if (dtrace_ls_initialized == FALSE) {
971 dtrace_ls_initialized = TRUE;
972 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
973 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
974 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
975 if (dtrace_ls_enabled) {
976 /*
977 * Either sleeping or spinning is happening,
978 * start a timing of our delay interval now.
979 */
980 readers_at_sleep = lock->lck_rw_shared_count;
981 wait_interval = mach_absolute_time();
982 }
983 }
984 #endif
985
986 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
987
988 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
989
990 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
991
992 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
993 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
994 break;
995 }
996 /*
997 * if we get here, the deadline has expired w/o us
998 * being able to grab the lock exclusively
999 * check to see if we're allowed to do a thread_block
1000 */
1001 word.data = ordered_load_rw(lock);
1002 if (word.can_sleep) {
1003 istate = lck_interlock_lock(lock);
1004 word.data = ordered_load_rw(lock);
1005
1006 if (word.shared_count != 0 || word.want_upgrade) {
1007 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1008
1009 word.w_waiting = 1;
1010 ordered_store_rw(lock, word.data);
1011
1012 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1013 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1014 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1015 lck_interlock_unlock(lock, istate);
1016
1017 if (res == THREAD_WAITING) {
1018 res = thread_block(THREAD_CONTINUE_NULL);
1019 slept++;
1020 }
1021 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1022 } else {
1023 lck_interlock_unlock(lock, istate);
1024 /*
1025 * must own the lock now, since we checked for
1026 * readers or upgrade owner behind the interlock
1027 * no need for a call to 'lck_rw_drain_status'
1028 */
1029 break;
1030 }
1031 }
1032 }
1033
1034 #if CONFIG_DTRACE
1035 /*
1036 * Decide what latencies we suffered that are Dtrace events.
1037 * If we have set wait_interval, then we either spun or slept.
1038 * At least we get out from under the interlock before we record
1039 * which is the best we can do here to minimize the impact
1040 * of the tracing.
1041 * If we have set wait_interval to -1, then dtrace was not enabled when we
1042 * started sleeping/spinning so we don't record this event.
1043 */
1044 if (dtrace_ls_enabled == TRUE) {
1045 if (slept == 0) {
1046 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1047 mach_absolute_time() - wait_interval, 1);
1048 } else {
1049 /*
1050 * For the blocking case, we also record if when we blocked
1051 * it was held for read or write, and how many readers.
1052 * Notice that above we recorded this before we dropped
1053 * the interlock so the count is accurate.
1054 */
1055 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1056 mach_absolute_time() - wait_interval, 1,
1057 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1058 }
1059 }
1060 #endif /* CONFIG_DTRACE */
1061
1062 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1063 lck_rw_drop(lock, LCK_RW_GRAB_F_WANT_EXCL);
1064 assert(lock_pause);
1065 return FALSE;
1066 }
1067
1068 #if CONFIG_DTRACE
1069 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1070 #endif /* CONFIG_DTRACE */
1071
1072 return TRUE;
1073 }
1074
1075 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1076 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1077 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1078 /*!
1079 * @function lck_rw_lock_exclusive_check_contended
1080 *
1081 * @abstract
1082 * Locks a rw_lock in exclusive mode.
1083 *
1084 * @discussion
1085 * This routine IS EXPERIMENTAL.
1086 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1087 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1088 *
1089 * @param lock rw_lock to lock.
1090 *
1091 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1092 * otherwise.
1093 */
1094 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1095 lck_rw_lock_exclusive_check_contended(
1096 lck_rw_t *lock)
1097 {
1098 thread_t thread = current_thread();
1099 bool contended = false;
1100
1101 if (lock->lck_rw_can_sleep) {
1102 lck_rw_lock_count_inc(thread, lock);
1103 } else if (get_preemption_level() == 0) {
1104 panic("Taking non-sleepable RW lock with preemption enabled");
1105 }
1106
1107 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1108 #if CONFIG_DTRACE
1109 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1110 #endif /* CONFIG_DTRACE */
1111 } else {
1112 contended = true;
1113 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1114 }
1115 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1116 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1117 ordered_store_rw_owner(lock, thread->ctid);
1118
1119 #ifdef DEBUG_RW
1120 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1121 #endif /* DEBUG_RW */
1122 return contended;
1123 }
1124
1125 __attribute__((always_inline))
1126 static boolean_t
1127 lck_rw_lock_exclusive_internal_inline(
1128 lck_rw_t *lock,
1129 void *caller,
1130 bool (^lock_pause)(void))
1131 {
1132 #pragma unused(caller)
1133 thread_t thread = current_thread();
1134
1135 if (lock->lck_rw_can_sleep) {
1136 lck_rw_lock_count_inc(thread, lock);
1137 } else if (get_preemption_level() == 0) {
1138 panic("Taking non-sleepable RW lock with preemption enabled");
1139 }
1140
1141 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1142 #if CONFIG_DTRACE
1143 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1144 #endif /* CONFIG_DTRACE */
1145 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1146 /*
1147 * lck_rw_lock_exclusive_gen() should only return
1148 * early if lock_pause has been passed and
1149 * returns FALSE. lock_pause is exclusive with
1150 * lck_rw_can_sleep().
1151 */
1152 assert(!lock->lck_rw_can_sleep);
1153 return FALSE;
1154 }
1155
1156 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1157 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1158 ordered_store_rw_owner(lock, thread->ctid);
1159
1160 #if DEBUG_RW
1161 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1162 #endif /* DEBUG_RW */
1163
1164 return TRUE;
1165 }
1166
1167 __attribute__((noinline))
1168 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1169 lck_rw_lock_exclusive_internal(
1170 lck_rw_t *lock,
1171 void *caller)
1172 {
1173 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1174 }
1175
1176 /*!
1177 * @function lck_rw_lock_exclusive
1178 *
1179 * @abstract
1180 * Locks a rw_lock in exclusive mode.
1181 *
1182 * @discussion
1183 * This function can block.
1184 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1185 * can acquire it in exclusive mode.
1186 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1187 *
1188 * @param lock rw_lock to lock.
1189 */
1190 void
lck_rw_lock_exclusive(lck_rw_t * lock)1191 lck_rw_lock_exclusive(
1192 lck_rw_t *lock)
1193 {
1194 (void) lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), NULL);
1195 }
1196
1197 /*!
1198 * @function lck_rw_lock_exclusive_b
1199 *
1200 * @abstract
1201 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1202 * and the specified block returns true.
1203 *
1204 * @discussion
1205 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1206 * acquired and the specified block returns true. The block is called
1207 * repeatedly when waiting to acquire the lock.
1208 * Should only be called when the lock cannot sleep (i.e. when
1209 * lock->lck_rw_can_sleep is false).
1210 *
1211 * @param lock rw_lock to lock.
1212 * @param lock_pause block invoked while waiting to acquire lock
1213 *
1214 * @returns Returns TRUE if the lock is successfully taken,
1215 * FALSE if the block returns true and the lock has
1216 * not been acquired.
1217 */
1218 boolean_t
1219 lck_rw_lock_exclusive_b(
1220 lck_rw_t *lock,
1221 bool (^lock_pause)(void))
1222 {
1223 assert(!lock->lck_rw_can_sleep);
1224
1225 return lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), lock_pause);
1226 }
1227
1228 /*
1229 * Routine: lck_rw_lock_shared_gen
1230 * Function:
1231 * Fast path code has determined that this lock
1232 * is held exclusively... this is where we spin/block
1233 * until we can acquire the lock in the shared mode
1234 */
1235 static boolean_t
1236 lck_rw_lock_shared_gen(
1237 lck_rw_t *lck,
1238 bool (^lock_pause)(void))
1239 {
1240 __assert_only thread_t self = current_thread();
1241 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1242 lck_rw_word_t word;
1243 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1244 int slept = 0;
1245 wait_result_t res = 0;
1246 boolean_t istate;
1247
1248 #if CONFIG_DTRACE
1249 uint64_t wait_interval = 0;
1250 int readers_at_sleep = 0;
1251 boolean_t dtrace_ls_initialized = FALSE;
1252 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1253 #endif /* CONFIG_DTRACE */
1254
1255 assertf(lck->lck_rw_owner != self->ctid,
1256 "Lock already held state=0x%x, owner=%p",
1257 ordered_load_rw(lck), self);
1258
1259 #ifdef DEBUG_RW
1260 /*
1261 * Best effort attempt to check that this thread
1262 * is not already holding the lock in shared mode.
1263 */
1264 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1265 #endif
1266
1267 while (lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1268 #if CONFIG_DTRACE
1269 if (dtrace_ls_initialized == FALSE) {
1270 dtrace_ls_initialized = TRUE;
1271 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1272 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1273 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1274 if (dtrace_ls_enabled) {
1275 /*
1276 * Either sleeping or spinning is happening,
1277 * start a timing of our delay interval now.
1278 */
1279 readers_at_sleep = lck->lck_rw_shared_count;
1280 wait_interval = mach_absolute_time();
1281 }
1282 }
1283 #endif
1284
1285 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1286 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1287
1288 grab_state = lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1289
1290 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1291 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1292
1293 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1294 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1295 break;
1296 }
1297
1298 /*
1299 * if we get here, the deadline has expired w/o us
1300 * being able to grab the lock for read
1301 * check to see if we're allowed to do a thread_block
1302 */
1303 if (lck->lck_rw_can_sleep) {
1304 istate = lck_interlock_lock(lck);
1305
1306 word.data = ordered_load_rw(lck);
1307 if ((word.want_excl || word.want_upgrade) &&
1308 ((word.shared_count == 0) || word.priv_excl)) {
1309 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1310 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1311
1312 word.r_waiting = 1;
1313 ordered_store_rw(lck, word.data);
1314
1315 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1316 res = assert_wait(LCK_RW_READER_EVENT(lck),
1317 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1318 lck_interlock_unlock(lck, istate);
1319
1320 if (res == THREAD_WAITING) {
1321 res = thread_block(THREAD_CONTINUE_NULL);
1322 slept++;
1323 }
1324 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1325 trace_lck, res, slept, 0, 0);
1326 } else {
1327 word.shared_count++;
1328 ordered_store_rw(lck, word.data);
1329 lck_interlock_unlock(lck, istate);
1330 break;
1331 }
1332 }
1333 }
1334
1335 #if CONFIG_DTRACE
1336 if (dtrace_ls_enabled == TRUE) {
1337 if (slept == 0) {
1338 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1339 } else {
1340 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1341 mach_absolute_time() - wait_interval, 0,
1342 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1343 }
1344 }
1345 #endif /* CONFIG_DTRACE */
1346
1347 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1348 assert(lock_pause);
1349 return FALSE;
1350 }
1351
1352 #if CONFIG_DTRACE
1353 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1354 #endif /* CONFIG_DTRACE */
1355
1356 return TRUE;
1357 }
1358
1359 __attribute__((always_inline))
1360 static boolean_t
1361 lck_rw_lock_shared_internal_inline(
1362 lck_rw_t *lock,
1363 void *caller,
1364 bool (^lock_pause)(void))
1365 {
1366 #pragma unused(caller)
1367
1368 uint32_t data, prev;
1369 thread_t thread = current_thread();
1370 #ifdef DEBUG_RW
1371 boolean_t check_canlock = TRUE;
1372 #endif
1373
1374 if (lock->lck_rw_can_sleep) {
1375 lck_rw_lock_count_inc(thread, lock);
1376 } else if (get_preemption_level() == 0) {
1377 panic("Taking non-sleepable RW lock with preemption enabled");
1378 }
1379
1380 for (;;) {
1381 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1382 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1383 atomic_exchange_abort();
1384 if (!lck_rw_lock_shared_gen(lock, lock_pause)) {
1385 /*
1386 * lck_rw_lock_shared_gen() should only return
1387 * early if lock_pause has been passed and
1388 * returns FALSE. lock_pause is exclusive with
1389 * lck_rw_can_sleep().
1390 */
1391 assert(!lock->lck_rw_can_sleep);
1392 return FALSE;
1393 }
1394
1395 goto locked;
1396 }
1397 #ifdef DEBUG_RW
1398 if ((data & LCK_RW_SHARED_MASK) == 0) {
1399 /*
1400 * If the lock is uncontended,
1401 * we do not need to check if we can lock it
1402 */
1403 check_canlock = FALSE;
1404 }
1405 #endif
1406 data += LCK_RW_SHARED_READER;
1407 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1408 break;
1409 }
1410 cpu_pause();
1411 }
1412 #ifdef DEBUG_RW
1413 if (check_canlock) {
1414 /*
1415 * Best effort attempt to check that this thread
1416 * is not already holding the lock (this checks read mode too).
1417 */
1418 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1419 }
1420 #endif
1421 locked:
1422 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1423 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1424
1425 #if CONFIG_DTRACE
1426 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1427 #endif /* CONFIG_DTRACE */
1428
1429 #ifdef DEBUG_RW
1430 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1431 #endif /* DEBUG_RW */
1432
1433 return TRUE;
1434 }
1435
1436 __attribute__((noinline))
1437 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1438 lck_rw_lock_shared_internal(
1439 lck_rw_t *lock,
1440 void *caller)
1441 {
1442 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1443 }
1444
1445 /*!
1446 * @function lck_rw_lock_shared
1447 *
1448 * @abstract
1449 * Locks a rw_lock in shared mode.
1450 *
1451 * @discussion
1452 * This function can block.
1453 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1454 * can acquire it in exclusive mode.
1455 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1456 * the lock without waiting.
1457 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1458 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1459 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1460 * in shared mode.
1461 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1462 *
1463 * @param lock rw_lock to lock.
1464 */
1465 void
lck_rw_lock_shared(lck_rw_t * lock)1466 lck_rw_lock_shared(
1467 lck_rw_t *lock)
1468 {
1469 (void) lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), NULL);
1470 }
1471
1472 /*!
1473 * @function lck_rw_lock_shared_b
1474 *
1475 * @abstract
1476 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1477 * and the specified block returns true.
1478 *
1479 * @discussion
1480 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1481 * acquired and the specified block returns true. The block is called
1482 * repeatedly when waiting to acquire the lock.
1483 * Should only be called when the lock cannot sleep (i.e. when
1484 * lock->lck_rw_can_sleep is false).
1485 *
1486 * @param lock rw_lock to lock.
1487 * @param lock_pause block invoked while waiting to acquire lock
1488 *
1489 * @returns Returns TRUE if the lock is successfully taken,
1490 * FALSE if the block returns true and the lock has
1491 * not been acquired.
1492 */
1493 boolean_t
1494 lck_rw_lock_shared_b(
1495 lck_rw_t *lock,
1496 bool (^lock_pause)(void))
1497 {
1498 assert(!lock->lck_rw_can_sleep);
1499
1500 return lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), lock_pause);
1501 }
1502
1503 /*
1504 * Routine: lck_rw_lock_shared_to_exclusive_failure
1505 * Function:
1506 * Fast path code has already dropped our read
1507 * count and determined that someone else owns 'lck_rw_want_upgrade'
1508 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1509 * all we need to do here is determine if a wakeup is needed
1510 */
1511 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1512 lck_rw_lock_shared_to_exclusive_failure(
1513 lck_rw_t *lck,
1514 uint32_t prior_lock_state)
1515 {
1516 thread_t thread = current_thread();
1517
1518 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1519 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1520 /*
1521 * Someone else has requested upgrade.
1522 * Since we've released the read lock, wake
1523 * him up if he's blocked waiting
1524 */
1525 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1526 }
1527
1528 /* Check if dropping the lock means that we need to unpromote */
1529 if (lck->lck_rw_can_sleep) {
1530 lck_rw_lock_count_dec(thread, lck);
1531 }
1532
1533 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1534 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1535
1536 #ifdef DEBUG_RW
1537 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1538 #endif /* DEBUG_RW */
1539
1540 return FALSE;
1541 }
1542
1543 /*
1544 * Routine: lck_rw_lock_shared_to_exclusive_success
1545 * Function:
1546 * the fast path code has already dropped our read
1547 * count and successfully acquired 'lck_rw_want_upgrade'
1548 * we just need to wait for the rest of the readers to drain
1549 * and then we can return as the exclusive holder of this lock
1550 */
1551 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1552 lck_rw_lock_shared_to_exclusive_success(
1553 lck_rw_t *lock)
1554 {
1555 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1556 int slept = 0;
1557 lck_rw_word_t word;
1558 wait_result_t res;
1559 boolean_t istate;
1560 lck_rw_drain_state_t drain_state;
1561
1562 #if CONFIG_DTRACE
1563 uint64_t wait_interval = 0;
1564 int readers_at_sleep = 0;
1565 boolean_t dtrace_ls_initialized = FALSE;
1566 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1567 #endif
1568
1569 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1570 word.data = ordered_load_rw(lock);
1571 #if CONFIG_DTRACE
1572 if (dtrace_ls_initialized == FALSE) {
1573 dtrace_ls_initialized = TRUE;
1574 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1575 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1576 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1577 if (dtrace_ls_enabled) {
1578 /*
1579 * Either sleeping or spinning is happening,
1580 * start a timing of our delay interval now.
1581 */
1582 readers_at_sleep = word.shared_count;
1583 wait_interval = mach_absolute_time();
1584 }
1585 }
1586 #endif
1587
1588 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1589 trace_lck, word.shared_count, 0, 0, 0);
1590
1591 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1592
1593 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1594 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1595
1596 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1597 break;
1598 }
1599
1600 /*
1601 * if we get here, the spin deadline in lck_rw_wait_on_status()
1602 * has expired w/o the rw_shared_count having drained to 0
1603 * check to see if we're allowed to do a thread_block
1604 */
1605 if (word.can_sleep) {
1606 istate = lck_interlock_lock(lock);
1607
1608 word.data = ordered_load_rw(lock);
1609 if (word.shared_count != 0) {
1610 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1611 trace_lck, word.shared_count, 0, 0, 0);
1612
1613 word.w_waiting = 1;
1614 ordered_store_rw(lock, word.data);
1615
1616 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1617 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1618 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1619 lck_interlock_unlock(lock, istate);
1620
1621 if (res == THREAD_WAITING) {
1622 res = thread_block(THREAD_CONTINUE_NULL);
1623 slept++;
1624 }
1625 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1626 trace_lck, res, slept, 0, 0);
1627 } else {
1628 lck_interlock_unlock(lock, istate);
1629 break;
1630 }
1631 }
1632 }
1633 #if CONFIG_DTRACE
1634 /*
1635 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1636 */
1637 if (dtrace_ls_enabled == TRUE) {
1638 if (slept == 0) {
1639 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1640 } else {
1641 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1642 mach_absolute_time() - wait_interval, 1,
1643 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1644 }
1645 }
1646 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1647 #endif
1648 }
1649
1650 /*!
1651 * @function lck_rw_lock_shared_to_exclusive
1652 *
1653 * @abstract
1654 * Upgrades a rw_lock held in shared mode to exclusive.
1655 *
1656 * @discussion
1657 * This function can block.
1658 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1659 * return with the lock not held.
1660 * The caller needs to hold the lock in shared mode to upgrade it.
1661 *
1662 * @param lock rw_lock already held in shared mode to upgrade.
1663 *
1664 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1665 * If the function was not able to upgrade the lock, the lock will be dropped
1666 * by the function.
1667 */
1668 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1669 lck_rw_lock_shared_to_exclusive(
1670 lck_rw_t *lock)
1671 {
1672 thread_t thread = current_thread();
1673 uint32_t data, prev;
1674
1675 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1676
1677 #if DEBUG_RW
1678 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1679 #endif /* DEBUG_RW */
1680
1681 for (;;) {
1682 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1683 if (data & LCK_RW_INTERLOCK) {
1684 atomic_exchange_abort();
1685 lck_rw_interlock_spin(lock);
1686 continue;
1687 }
1688 if (data & LCK_RW_WANT_UPGRADE) {
1689 data -= LCK_RW_SHARED_READER;
1690 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1691 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1692 }
1693 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1694 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1695 }
1696 } else {
1697 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1698 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1699 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1700 break;
1701 }
1702 }
1703 cpu_pause();
1704 }
1705 /* we now own the WANT_UPGRADE */
1706 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1707 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1708 }
1709
1710 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1711 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1712
1713 ordered_store_rw_owner(lock, thread->ctid);
1714 #if CONFIG_DTRACE
1715 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1716 #endif /* CONFIG_DTRACE */
1717
1718 #if DEBUG_RW
1719 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1720 #endif /* DEBUG_RW */
1721 return TRUE;
1722 }
1723
1724 /*
1725 * Routine: lck_rw_lock_exclusive_to_shared_gen
1726 * Function:
1727 * Fast path has already dropped
1728 * our exclusive state and bumped lck_rw_shared_count
1729 * all we need to do here is determine if anyone
1730 * needs to be awakened.
1731 */
1732 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1733 lck_rw_lock_exclusive_to_shared_gen(
1734 lck_rw_t *lck,
1735 uint32_t prior_lock_state,
1736 void *caller)
1737 {
1738 #pragma unused(caller)
1739 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1740 lck_rw_word_t fake_lck;
1741
1742 /*
1743 * prior_lock state is a snapshot of the 1st word of the
1744 * lock in question... we'll fake up a pointer to it
1745 * and carefully not access anything beyond whats defined
1746 * in the first word of a lck_rw_t
1747 */
1748 fake_lck.data = prior_lock_state;
1749
1750 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1751 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1752
1753 /*
1754 * don't wake up anyone waiting to take the lock exclusively
1755 * since we hold a read count... when the read count drops to 0,
1756 * the writers will be woken.
1757 *
1758 * wake up any waiting readers if we don't have any writers waiting,
1759 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1760 */
1761 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1762 thread_wakeup(LCK_RW_READER_EVENT(lck));
1763 }
1764
1765 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1766 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1767
1768 #if CONFIG_DTRACE
1769 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1770 #endif
1771
1772 #if DEBUG_RW
1773 thread_t thread = current_thread();
1774 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1775 #endif /* DEBUG_RW */
1776 }
1777
1778 /*!
1779 * @function lck_rw_lock_exclusive_to_shared
1780 *
1781 * @abstract
1782 * Downgrades a rw_lock held in exclusive mode to shared.
1783 *
1784 * @discussion
1785 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1786 *
1787 * @param lock rw_lock already held in exclusive mode to downgrade.
1788 */
1789 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1790 lck_rw_lock_exclusive_to_shared(
1791 lck_rw_t *lock)
1792 {
1793 uint32_t data, prev;
1794
1795 assertf(lock->lck_rw_owner == current_thread()->ctid,
1796 "state=0x%x, owner=%p", lock->lck_rw_data,
1797 ctid_get_thread_unsafe(lock->lck_rw_owner));
1798 ordered_store_rw_owner(lock, 0);
1799
1800 for (;;) {
1801 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1802 if (data & LCK_RW_INTERLOCK) {
1803 atomic_exchange_abort();
1804 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1805 continue;
1806 }
1807 data += LCK_RW_SHARED_READER;
1808 if (data & LCK_RW_WANT_UPGRADE) {
1809 data &= ~(LCK_RW_WANT_UPGRADE);
1810 } else {
1811 data &= ~(LCK_RW_WANT_EXCL);
1812 }
1813 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1814 data &= ~(LCK_RW_W_WAITING);
1815 }
1816 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1817 break;
1818 }
1819 cpu_pause();
1820 }
1821 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1822 }
1823
1824 /*
1825 * Very sad hack, but the codegen for lck_rw_lock
1826 * is very unhappy with the combination of __builtin_return_address()
1827 * and a noreturn function. For some reason it adds more frames
1828 * than it should. rdar://76570684
1829 */
1830 void
1831 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1832 #pragma clang diagnostic push
1833 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1834 __attribute__((noinline, weak))
1835 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1836 _lck_rw_lock_type_panic(
1837 lck_rw_t *lck,
1838 lck_rw_type_t lck_rw_type)
1839 {
1840 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1841 }
1842 #pragma clang diagnostic pop
1843
1844 /*!
1845 * @function lck_rw_lock
1846 *
1847 * @abstract
1848 * Locks a rw_lock with the specified type.
1849 *
1850 * @discussion
1851 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1852 *
1853 * @param lck rw_lock to lock.
1854 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1855 */
1856 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1857 lck_rw_lock(
1858 lck_rw_t *lck,
1859 lck_rw_type_t lck_rw_type)
1860 {
1861 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1862 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1863 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1864 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1865 }
1866 _lck_rw_lock_type_panic(lck, lck_rw_type);
1867 }
1868
1869 __attribute__((always_inline))
1870 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1871 lck_rw_try_lock_shared_internal_inline(
1872 lck_rw_t *lock,
1873 void *caller)
1874 {
1875 #pragma unused(caller)
1876
1877 uint32_t data, prev;
1878 thread_t thread = current_thread();
1879 #ifdef DEBUG_RW
1880 boolean_t check_canlock = TRUE;
1881 #endif
1882
1883 for (;;) {
1884 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1885 if (data & LCK_RW_INTERLOCK) {
1886 atomic_exchange_abort();
1887 lck_rw_interlock_spin(lock);
1888 continue;
1889 }
1890 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1891 atomic_exchange_abort();
1892 return FALSE; /* lock is busy */
1893 }
1894 #ifdef DEBUG_RW
1895 if ((data & LCK_RW_SHARED_MASK) == 0) {
1896 /*
1897 * If the lock is uncontended,
1898 * we do not need to check if we can lock it
1899 */
1900 check_canlock = FALSE;
1901 }
1902 #endif
1903 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1904 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1905 break;
1906 }
1907 cpu_pause();
1908 }
1909 #ifdef DEBUG_RW
1910 if (check_canlock) {
1911 /*
1912 * Best effort attempt to check that this thread
1913 * is not already holding the lock (this checks read mode too).
1914 */
1915 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1916 }
1917 #endif
1918 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1919 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1920
1921 if (lock->lck_rw_can_sleep) {
1922 lck_rw_lock_count_inc(thread, lock);
1923 } else if (get_preemption_level() == 0) {
1924 panic("Taking non-sleepable RW lock with preemption enabled");
1925 }
1926
1927 #if CONFIG_DTRACE
1928 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1929 #endif /* CONFIG_DTRACE */
1930
1931 #ifdef DEBUG_RW
1932 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1933 #endif /* DEBUG_RW */
1934 return TRUE;
1935 }
1936
1937 __attribute__((noinline))
1938 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1939 lck_rw_try_lock_shared_internal(
1940 lck_rw_t *lock,
1941 void *caller)
1942 {
1943 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1944 }
1945
1946 /*!
1947 * @function lck_rw_try_lock_shared
1948 *
1949 * @abstract
1950 * Tries to locks a rw_lock in read mode.
1951 *
1952 * @discussion
1953 * This function will return and not block in case the lock is already held.
1954 * See lck_rw_lock_shared for more details.
1955 *
1956 * @param lock rw_lock to lock.
1957 *
1958 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1959 */
1960 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)1961 lck_rw_try_lock_shared(
1962 lck_rw_t *lock)
1963 {
1964 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
1965 }
1966
1967 __attribute__((always_inline))
1968 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)1969 lck_rw_try_lock_exclusive_internal_inline(
1970 lck_rw_t *lock,
1971 void *caller)
1972 {
1973 #pragma unused(caller)
1974 uint32_t data, prev;
1975
1976 for (;;) {
1977 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1978 if (data & LCK_RW_INTERLOCK) {
1979 atomic_exchange_abort();
1980 lck_rw_interlock_spin(lock);
1981 continue;
1982 }
1983 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1984 atomic_exchange_abort();
1985 return FALSE;
1986 }
1987 data |= LCK_RW_WANT_EXCL;
1988 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1989 break;
1990 }
1991 cpu_pause();
1992 }
1993 thread_t thread = current_thread();
1994
1995 if (lock->lck_rw_can_sleep) {
1996 lck_rw_lock_count_inc(thread, lock);
1997 } else if (get_preemption_level() == 0) {
1998 panic("Taking non-sleepable RW lock with preemption enabled");
1999 }
2000
2001 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2002 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2003
2004 ordered_store_rw_owner(lock, thread->ctid);
2005 #if CONFIG_DTRACE
2006 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2007 #endif /* CONFIG_DTRACE */
2008
2009 #ifdef DEBUG_RW
2010 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2011 #endif /* DEBUG_RW */
2012 return TRUE;
2013 }
2014
2015 __attribute__((noinline))
2016 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)2017 lck_rw_try_lock_exclusive_internal(
2018 lck_rw_t *lock,
2019 void *caller)
2020 {
2021 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2022 }
2023
2024 /*!
2025 * @function lck_rw_try_lock_exclusive
2026 *
2027 * @abstract
2028 * Tries to locks a rw_lock in write mode.
2029 *
2030 * @discussion
2031 * This function will return and not block in case the lock is already held.
2032 * See lck_rw_lock_exclusive for more details.
2033 *
2034 * @param lock rw_lock to lock.
2035 *
2036 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2037 */
2038 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)2039 lck_rw_try_lock_exclusive(
2040 lck_rw_t *lock)
2041 {
2042 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
2043 }
2044
2045 /*
2046 * Very sad hack, but the codegen for lck_rw_try_lock
2047 * is very unhappy with the combination of __builtin_return_address()
2048 * and a noreturn function. For some reason it adds more frames
2049 * than it should. rdar://76570684
2050 */
2051 boolean_t
2052 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2053 #pragma clang diagnostic push
2054 #pragma clang diagnostic ignored "-Wmissing-noreturn"
2055 __attribute__((noinline, weak))
2056 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2057 _lck_rw_try_lock_type_panic(
2058 lck_rw_t *lck,
2059 lck_rw_type_t lck_rw_type)
2060 {
2061 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2062 }
2063 #pragma clang diagnostic pop
2064
2065 /*!
2066 * @function lck_rw_try_lock
2067 *
2068 * @abstract
2069 * Tries to locks a rw_lock with the specified type.
2070 *
2071 * @discussion
2072 * This function will return and not wait/block in case the lock is already held.
2073 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2074 *
2075 * @param lck rw_lock to lock.
2076 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2077 *
2078 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2079 */
2080 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2081 lck_rw_try_lock(
2082 lck_rw_t *lck,
2083 lck_rw_type_t lck_rw_type)
2084 {
2085 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2086 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
2087 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2088 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
2089 }
2090 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2091 }
2092
2093 /*
2094 * Routine: lck_rw_done_gen
2095 *
2096 * prior_lock_state is the value in the 1st
2097 * word of the lock at the time of a successful
2098 * atomic compare and exchange with the new value...
2099 * it represents the state of the lock before we
2100 * decremented the rw_shared_count or cleared either
2101 * rw_want_upgrade or rw_want_write and
2102 * the lck_x_waiting bits... since the wrapper
2103 * routine has already changed the state atomically,
2104 * we just need to decide if we should
2105 * wake up anyone and what value to return... we do
2106 * this by examining the state of the lock before
2107 * we changed it
2108 */
2109 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)2110 lck_rw_done_gen(
2111 lck_rw_t *lck,
2112 uint32_t prior_lock_state)
2113 {
2114 lck_rw_word_t fake_lck;
2115 lck_rw_type_t lock_type;
2116 thread_t thread;
2117
2118 /*
2119 * prior_lock state is a snapshot of the 1st word of the
2120 * lock in question... we'll fake up a pointer to it
2121 * and carefully not access anything beyond whats defined
2122 * in the first word of a lck_rw_t
2123 */
2124 fake_lck.data = prior_lock_state;
2125
2126 if (fake_lck.shared_count <= 1) {
2127 if (fake_lck.w_waiting) {
2128 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2129 }
2130
2131 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2132 thread_wakeup(LCK_RW_READER_EVENT(lck));
2133 }
2134 }
2135 if (fake_lck.shared_count) {
2136 lock_type = LCK_RW_TYPE_SHARED;
2137 } else {
2138 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2139 }
2140
2141 /* Check if dropping the lock means that we need to unpromote */
2142 thread = current_thread();
2143 if (fake_lck.can_sleep) {
2144 lck_rw_lock_count_dec(thread, lck);
2145 }
2146
2147 #if CONFIG_DTRACE
2148 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2149 #endif
2150
2151 #ifdef DEBUG_RW
2152 remove_held_rwlock(lck, thread, lock_type);
2153 #endif /* DEBUG_RW */
2154 return lock_type;
2155 }
2156
2157 /*!
2158 * @function lck_rw_done
2159 *
2160 * @abstract
2161 * Force unlocks a rw_lock without consistency checks.
2162 *
2163 * @discussion
2164 * Do not use unless sure you can avoid consistency checks.
2165 *
2166 * @param lock rw_lock to unlock.
2167 */
2168 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2169 lck_rw_done(
2170 lck_rw_t *lock)
2171 {
2172 uint32_t data, prev;
2173 boolean_t once = FALSE;
2174
2175 #ifdef DEBUG_RW
2176 /*
2177 * Best effort attempt to check that this thread
2178 * is holding the lock.
2179 */
2180 thread_t thread = current_thread();
2181 assert_held_rwlock(lock, thread, 0);
2182 #endif /* DEBUG_RW */
2183 for (;;) {
2184 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2185 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2186 atomic_exchange_abort();
2187 lck_rw_interlock_spin(lock);
2188 continue;
2189 }
2190 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2191 assertf(lock->lck_rw_owner == 0,
2192 "state=0x%x, owner=%p", lock->lck_rw_data,
2193 ctid_get_thread_unsafe(lock->lck_rw_owner));
2194 data -= LCK_RW_SHARED_READER;
2195 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2196 goto check_waiters;
2197 }
2198 } else { /* if reader count == 0, must be exclusive lock */
2199 if (data & LCK_RW_WANT_UPGRADE) {
2200 data &= ~(LCK_RW_WANT_UPGRADE);
2201 } else {
2202 if (data & LCK_RW_WANT_EXCL) {
2203 data &= ~(LCK_RW_WANT_EXCL);
2204 } else { /* lock is not 'owned', panic */
2205 panic("Releasing non-exclusive RW lock without a reader refcount!");
2206 }
2207 }
2208 if (!once) {
2209 // Only check for holder and clear it once
2210 assertf(lock->lck_rw_owner == current_thread()->ctid,
2211 "state=0x%x, owner=%p", lock->lck_rw_data,
2212 ctid_get_thread_unsafe(lock->lck_rw_owner));
2213 ordered_store_rw_owner(lock, 0);
2214 once = TRUE;
2215 }
2216 check_waiters:
2217 /*
2218 * test the original values to match what
2219 * lck_rw_done_gen is going to do to determine
2220 * which wakeups need to happen...
2221 *
2222 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2223 */
2224 if (prev & LCK_RW_W_WAITING) {
2225 data &= ~(LCK_RW_W_WAITING);
2226 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2227 data &= ~(LCK_RW_R_WAITING);
2228 }
2229 } else {
2230 data &= ~(LCK_RW_R_WAITING);
2231 }
2232 }
2233 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2234 break;
2235 }
2236 cpu_pause();
2237 }
2238 return lck_rw_done_gen(lock, prev);
2239 }
2240
2241 /*!
2242 * @function lck_rw_unlock_shared
2243 *
2244 * @abstract
2245 * Unlocks a rw_lock previously locked in shared mode.
2246 *
2247 * @discussion
2248 * The same thread that locked the lock needs to unlock it.
2249 *
2250 * @param lck rw_lock held in shared mode to unlock.
2251 */
2252 void
lck_rw_unlock_shared(lck_rw_t * lck)2253 lck_rw_unlock_shared(
2254 lck_rw_t *lck)
2255 {
2256 lck_rw_type_t ret;
2257
2258 assertf(lck->lck_rw_owner == 0,
2259 "state=0x%x, owner=%p", lck->lck_rw_data,
2260 ctid_get_thread_unsafe(lck->lck_rw_owner));
2261 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2262 ret = lck_rw_done(lck);
2263
2264 if (ret != LCK_RW_TYPE_SHARED) {
2265 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2266 }
2267 }
2268
2269 /*!
2270 * @function lck_rw_unlock_exclusive
2271 *
2272 * @abstract
2273 * Unlocks a rw_lock previously locked in exclusive mode.
2274 *
2275 * @discussion
2276 * The same thread that locked the lock needs to unlock it.
2277 *
2278 * @param lck rw_lock held in exclusive mode to unlock.
2279 */
2280 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2281 lck_rw_unlock_exclusive(
2282 lck_rw_t *lck)
2283 {
2284 lck_rw_type_t ret;
2285
2286 assertf(lck->lck_rw_owner == current_thread()->ctid,
2287 "state=0x%x, owner=%p", lck->lck_rw_data,
2288 ctid_get_thread_unsafe(lck->lck_rw_owner));
2289 ret = lck_rw_done(lck);
2290
2291 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2292 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2293 }
2294 }
2295
2296 /*!
2297 * @function lck_rw_unlock
2298 *
2299 * @abstract
2300 * Unlocks a rw_lock previously locked with lck_rw_type.
2301 *
2302 * @discussion
2303 * The lock must be unlocked by the same thread it was locked from.
2304 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2305 * holding the lock.
2306 *
2307 * @param lck rw_lock to unlock.
2308 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2309 */
2310 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2311 lck_rw_unlock(
2312 lck_rw_t *lck,
2313 lck_rw_type_t lck_rw_type)
2314 {
2315 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2316 lck_rw_unlock_shared(lck);
2317 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2318 lck_rw_unlock_exclusive(lck);
2319 } else {
2320 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2321 }
2322 }
2323
2324 /*!
2325 * @function lck_rw_assert
2326 *
2327 * @abstract
2328 * Asserts the rw_lock is held.
2329 *
2330 * @discussion
2331 * read-write locks do not have a concept of ownership when held in shared mode,
2332 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2333 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2334 * this function can be more accurate.
2335 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2336 * LCK_RW_ASSERT_NOTHELD.
2337 *
2338 * @param lck rw_lock to check.
2339 * @param type assert type
2340 */
2341 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2342 lck_rw_assert(
2343 lck_rw_t *lck,
2344 unsigned int type)
2345 {
2346 thread_t thread = current_thread();
2347
2348 switch (type) {
2349 case LCK_RW_ASSERT_SHARED:
2350 if ((lck->lck_rw_shared_count != 0) &&
2351 (lck->lck_rw_owner == 0)) {
2352 #if DEBUG_RW
2353 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2354 #endif /* DEBUG_RW */
2355 return;
2356 }
2357 break;
2358 case LCK_RW_ASSERT_EXCLUSIVE:
2359 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2360 (lck->lck_rw_shared_count == 0) &&
2361 (lck->lck_rw_owner == thread->ctid)) {
2362 #if DEBUG_RW
2363 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2364 #endif /* DEBUG_RW */
2365 return;
2366 }
2367 break;
2368 case LCK_RW_ASSERT_HELD:
2369 if (lck->lck_rw_shared_count != 0) {
2370 #if DEBUG_RW
2371 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2372 #endif /* DEBUG_RW */
2373 return; // Held shared
2374 }
2375 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2376 (lck->lck_rw_owner == thread->ctid)) {
2377 #if DEBUG_RW
2378 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2379 #endif /* DEBUG_RW */
2380 return; // Held exclusive
2381 }
2382 break;
2383 case LCK_RW_ASSERT_NOTHELD:
2384 if ((lck->lck_rw_shared_count == 0) &&
2385 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2386 (lck->lck_rw_owner == 0)) {
2387 #ifdef DEBUG_RW
2388 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2389 #endif /* DEBUG_RW */
2390 return;
2391 }
2392 break;
2393 default:
2394 break;
2395 }
2396 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2397 }
2398
2399 /*!
2400 * @function kdp_lck_rw_lock_is_acquired_exclusive
2401 *
2402 * @abstract
2403 * Checks if a rw_lock is held exclusevely.
2404 *
2405 * @discussion
2406 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2407 *
2408 * @param lck lock to check
2409 *
2410 * @returns TRUE if the lock is held exclusevely
2411 */
2412 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2413 kdp_lck_rw_lock_is_acquired_exclusive(
2414 lck_rw_t *lck)
2415 {
2416 if (not_in_kdp) {
2417 panic("panic: rw lock exclusive check done outside of kernel debugger");
2418 }
2419 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2420 }
2421
2422 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2423 kdp_rwlck_find_owner(
2424 __unused struct waitq *waitq,
2425 event64_t event,
2426 thread_waitinfo_t *waitinfo)
2427 {
2428 lck_rw_t *rwlck = NULL;
2429 switch (waitinfo->wait_type) {
2430 case kThreadWaitKernelRWLockRead:
2431 rwlck = READ_EVENT_TO_RWLOCK(event);
2432 break;
2433 case kThreadWaitKernelRWLockWrite:
2434 case kThreadWaitKernelRWLockUpgrade:
2435 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2436 break;
2437 default:
2438 panic("%s was called with an invalid blocking type", __FUNCTION__);
2439 break;
2440 }
2441 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2442 waitinfo->owner = thread_tid(ctid_get_thread(rwlck->lck_rw_owner));
2443 }
2444
2445 /*!
2446 * @function lck_rw_lock_yield_shared
2447 *
2448 * @abstract
2449 * Yields a rw_lock held in shared mode.
2450 *
2451 * @discussion
2452 * This function can block.
2453 * Yields the lock in case there are writers waiting.
2454 * The yield will unlock, block, and re-lock the lock in shared mode.
2455 *
2456 * @param lck rw_lock already held in shared mode to yield.
2457 * @param force_yield if set to true it will always yield irrespective of the lock status
2458 *
2459 * @returns TRUE if the lock was yield, FALSE otherwise
2460 */
2461 bool
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2462 lck_rw_lock_yield_shared(
2463 lck_rw_t *lck,
2464 boolean_t force_yield)
2465 {
2466 lck_rw_word_t word;
2467
2468 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2469
2470 word.data = ordered_load_rw(lck);
2471 if (word.want_excl || word.want_upgrade || force_yield) {
2472 lck_rw_unlock_shared(lck);
2473 mutex_pause(2);
2474 lck_rw_lock_shared(lck);
2475 return true;
2476 }
2477
2478 return false;
2479 }
2480
2481 /*!
2482 * @function lck_rw_lock_yield_exclusive
2483 *
2484 * @abstract
2485 * Yields a rw_lock held in exclusive mode.
2486 *
2487 * @discussion
2488 * This function can block.
2489 * Yields the lock in case there are writers waiting.
2490 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2491 *
2492 * @param lck rw_lock already held in exclusive mode to yield.
2493 * @param mode when to yield.
2494 *
2495 * @returns TRUE if the lock was yield, FALSE otherwise
2496 */
2497 bool
lck_rw_lock_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2498 lck_rw_lock_yield_exclusive(
2499 lck_rw_t *lck,
2500 lck_rw_yield_t mode)
2501 {
2502 lck_rw_word_t word;
2503 bool yield = false;
2504
2505 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2506
2507 if (mode == LCK_RW_YIELD_ALWAYS) {
2508 yield = true;
2509 } else {
2510 word.data = ordered_load_rw(lck);
2511 if (word.w_waiting) {
2512 yield = true;
2513 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2514 yield = (word.r_waiting != 0);
2515 }
2516 }
2517
2518 if (yield) {
2519 lck_rw_unlock_exclusive(lck);
2520 mutex_pause(2);
2521 lck_rw_lock_exclusive(lck);
2522 }
2523
2524 return yield;
2525 }
2526
2527 /*!
2528 * @function lck_rw_sleep
2529 *
2530 * @abstract
2531 * Assert_wait on an event while holding the rw_lock.
2532 *
2533 * @discussion
2534 * the flags can decide how to re-acquire the lock upon wake up
2535 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2536 * and if the priority needs to be kept boosted until the lock is
2537 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2538 *
2539 * @param lck rw_lock to use to synch the assert_wait.
2540 * @param lck_sleep_action flags.
2541 * @param event event to assert_wait on.
2542 * @param interruptible wait type.
2543 */
2544 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2545 lck_rw_sleep(
2546 lck_rw_t *lck,
2547 lck_sleep_action_t lck_sleep_action,
2548 event_t event,
2549 wait_interrupt_t interruptible)
2550 {
2551 wait_result_t res;
2552 lck_rw_type_t lck_rw_type;
2553 thread_pri_floor_t token;
2554
2555 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2556 panic("Invalid lock sleep action %x", lck_sleep_action);
2557 }
2558
2559 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2560 /*
2561 * Although we are dropping the RW lock, the intent in most cases
2562 * is that this thread remains as an observer, since it may hold
2563 * some secondary resource, but must yield to avoid deadlock. In
2564 * this situation, make sure that the thread is boosted to the
2565 * ceiling while blocked, so that it can re-acquire the
2566 * RW lock at that priority.
2567 */
2568 token = thread_priority_floor_start();
2569 }
2570
2571 res = assert_wait(event, interruptible);
2572 if (res == THREAD_WAITING) {
2573 lck_rw_type = lck_rw_done(lck);
2574 res = thread_block(THREAD_CONTINUE_NULL);
2575 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2576 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2577 lck_rw_lock(lck, lck_rw_type);
2578 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2579 lck_rw_lock_exclusive(lck);
2580 } else {
2581 lck_rw_lock_shared(lck);
2582 }
2583 }
2584 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2585 (void)lck_rw_done(lck);
2586 }
2587
2588 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2589 thread_priority_floor_end(&token);
2590 }
2591
2592 return res;
2593 }
2594
2595 /*!
2596 * @function lck_rw_sleep_deadline
2597 *
2598 * @abstract
2599 * Assert_wait_deadline on an event while holding the rw_lock.
2600 *
2601 * @discussion
2602 * the flags can decide how to re-acquire the lock upon wake up
2603 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2604 * and if the priority needs to be kept boosted until the lock is
2605 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2606 *
2607 * @param lck rw_lock to use to synch the assert_wait.
2608 * @param lck_sleep_action flags.
2609 * @param event event to assert_wait on.
2610 * @param interruptible wait type.
2611 * @param deadline maximum time after which being woken up
2612 */
2613 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2614 lck_rw_sleep_deadline(
2615 lck_rw_t *lck,
2616 lck_sleep_action_t lck_sleep_action,
2617 event_t event,
2618 wait_interrupt_t interruptible,
2619 uint64_t deadline)
2620 {
2621 wait_result_t res;
2622 lck_rw_type_t lck_rw_type;
2623 thread_pri_floor_t token;
2624
2625 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2626 panic("Invalid lock sleep action %x", lck_sleep_action);
2627 }
2628
2629 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2630 token = thread_priority_floor_start();
2631 }
2632
2633 res = assert_wait_deadline(event, interruptible, deadline);
2634 if (res == THREAD_WAITING) {
2635 lck_rw_type = lck_rw_done(lck);
2636 res = thread_block(THREAD_CONTINUE_NULL);
2637 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2638 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2639 lck_rw_lock(lck, lck_rw_type);
2640 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2641 lck_rw_lock_exclusive(lck);
2642 } else {
2643 lck_rw_lock_shared(lck);
2644 }
2645 }
2646 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2647 (void)lck_rw_done(lck);
2648 }
2649
2650 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2651 thread_priority_floor_end(&token);
2652 }
2653
2654 return res;
2655 }
2656
2657 /*
2658 * Reader-writer lock promotion
2659 *
2660 * We support a limited form of reader-writer
2661 * lock promotion whose effects are:
2662 *
2663 * * Qualifying threads have decay disabled
2664 * * Scheduler priority is reset to a floor of
2665 * of their statically assigned priority
2666 * or MINPRI_RWLOCK
2667 *
2668 * The rationale is that lck_rw_ts do not have
2669 * a single owner, so we cannot apply a directed
2670 * priority boost from all waiting threads
2671 * to all holding threads without maintaining
2672 * lists of all shared owners and all waiting
2673 * threads for every lock.
2674 *
2675 * Instead (and to preserve the uncontended fast-
2676 * path), acquiring (or attempting to acquire)
2677 * a RW lock in shared or exclusive lock increments
2678 * a per-thread counter. Only if that thread stops
2679 * making forward progress (for instance blocking
2680 * on a mutex, or being preempted) do we consult
2681 * the counter and apply the priority floor.
2682 * When the thread becomes runnable again (or in
2683 * the case of preemption it never stopped being
2684 * runnable), it has the priority boost and should
2685 * be in a good position to run on the CPU and
2686 * release all RW locks (at which point the priority
2687 * boost is cleared).
2688 *
2689 * Care must be taken to ensure that priority
2690 * boosts are not retained indefinitely, since unlike
2691 * mutex priority boosts (where the boost is tied
2692 * to the mutex lifecycle), the boost is tied
2693 * to the thread and independent of any particular
2694 * lck_rw_t. Assertions are in place on return
2695 * to userspace so that the boost is not held
2696 * indefinitely.
2697 *
2698 * The routines that increment/decrement the
2699 * per-thread counter should err on the side of
2700 * incrementing any time a preemption is possible
2701 * and the lock would be visible to the rest of the
2702 * system as held (so it should be incremented before
2703 * interlocks are dropped/preemption is enabled, or
2704 * before a CAS is executed to acquire the lock).
2705 *
2706 */
2707
2708 /*!
2709 * @function lck_rw_clear_promotion
2710 *
2711 * @abstract
2712 * Undo priority promotions when the last rw_lock
2713 * is released by a thread (if a promotion was active).
2714 *
2715 * @param thread thread to demote.
2716 * @param lock object reason for the demotion.
2717 */
2718 __attribute__((noinline))
2719 static void
lck_rw_clear_promotion(thread_t thread,const void * lock)2720 lck_rw_clear_promotion(thread_t thread, const void *lock)
2721 {
2722 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2723 spl_t s = splsched();
2724 thread_lock(thread);
2725
2726 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2727 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED,
2728 unslide_for_kdebug(lock));
2729 }
2730
2731 thread_unlock(thread);
2732 splx(s);
2733 }
2734
2735 /*!
2736 * @function lck_rw_set_promotion_locked
2737 *
2738 * @abstract
2739 * Callout from context switch if the thread goes
2740 * off core with a positive rwlock_count.
2741 *
2742 * @discussion
2743 * Called at splsched with the thread locked.
2744 *
2745 * @param thread thread to promote.
2746 */
2747 __attribute__((always_inline))
2748 void
lck_rw_set_promotion_locked(thread_t thread)2749 lck_rw_set_promotion_locked(thread_t thread)
2750 {
2751 if (LcksOpts & disLkRWPrio) {
2752 return;
2753 }
2754
2755 assert(thread->rwlock_count > 0);
2756
2757 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2758 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2759 }
2760 }
2761
2762 __attribute__((always_inline))
2763 void
lck_rw_lock_count_inc(thread_t thread,const void * lock __unused)2764 lck_rw_lock_count_inc(thread_t thread, const void *lock __unused)
2765 {
2766 if (thread->rwlock_count++ == 0) {
2767 #if MACH_ASSERT
2768 /*
2769 * Set the ast to check that the
2770 * rwlock_count is going to be set to zero when
2771 * going back to userspace.
2772 * Set it only once when we increment it for the first time.
2773 */
2774 act_set_debug_assert();
2775 #endif
2776 }
2777 }
2778
2779 __abortlike
2780 static void
__lck_rw_lock_count_dec_panic(thread_t thread)2781 __lck_rw_lock_count_dec_panic(thread_t thread)
2782 {
2783 panic("rw lock count underflow for thread %p", thread);
2784 }
2785
2786 __attribute__((always_inline))
2787 void
lck_rw_lock_count_dec(thread_t thread,const void * lock)2788 lck_rw_lock_count_dec(thread_t thread, const void *lock)
2789 {
2790 uint32_t rwlock_count = thread->rwlock_count--;
2791
2792 if (rwlock_count == 0) {
2793 __lck_rw_lock_count_dec_panic(thread);
2794 }
2795
2796 if (__probable(rwlock_count == 1)) {
2797 /* sched_flags checked without lock, but will be rechecked while clearing */
2798 if (__improbable(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2799 lck_rw_clear_promotion(thread, lock);
2800 }
2801 }
2802 }
2803