1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42
43 #include <mach/machine/sdt.h>
44
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46 struct thread_qos_override, KT_DEFAULT);
47
48 #ifdef MACH_BSD
49 extern int proc_selfpid(void);
50 extern char * proc_name_address(void *p);
51 extern void rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53
54 #define QOS_EXTRACT(q) ((q) & 0xff)
55
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66
67 const int thread_default_iotier_override = THROTTLE_LEVEL_END;
68
69 const struct thread_requested_policy default_thread_requested_policy = {
70 .thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72
73 /*
74 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75 * to threads that don't have a QoS class set.
76 */
77 const qos_policy_params_t thread_qos_policy_params = {
78 /*
79 * This table defines the starting base priority of the thread,
80 * which will be modified by the thread importance and the task max priority
81 * before being applied.
82 */
83 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
84 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
85 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
86 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
87 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
88 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
89 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
90
91 /*
92 * This table defines the highest IO priority that a thread marked with this
93 * QoS class can have.
94 */
95 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
96 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
99 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
100 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
102
103 /*
104 * This table defines the highest QoS level that
105 * a thread marked with this QoS class can have.
106 */
107
108 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115
116 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (!allow_qos_policy_set) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287
288 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 return KERN_INVALID_ARGUMENT;
291 }
292 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 if (info->priority != BASEPRI_RTQUEUES) {
294 return KERN_INVALID_ARGUMENT;
295 }
296 }
297 }
298
299 if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 &thread->th_work_interval_flags, relaxed);
302 if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 /* Fail requests to become realtime for threads having joined workintervals
305 * with workload ID that don't have the rt-allowed flag. */
306 return KERN_INVALID_POLICY;
307 }
308 }
309
310 /* Threads without static_param set reset their QoS when other policies are applied. */
311 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 /* Store the existing tier, if we fail this call it is used to reset back. */
313 req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315
316 kr = thread_remove_qos_policy(thread);
317 if (kr != KERN_SUCCESS) {
318 return kr;
319 }
320 }
321
322 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323
324 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 if (kr != KERN_SUCCESS) {
326 /* Reset back to our original tier as the set failed. */
327 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 }
329 }
330
331 return kr;
332 }
333
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 thread_t thread,
342 thread_policy_flavor_t flavor,
343 thread_policy_t policy_info,
344 mach_msg_type_number_t count)
345 {
346 kern_return_t result = KERN_SUCCESS;
347 struct task_pend_token pend_token = {};
348
349 thread_mtx_lock(thread);
350 if (!thread->active) {
351 thread_mtx_unlock(thread);
352
353 return KERN_TERMINATED;
354 }
355
356 switch (flavor) {
357 case THREAD_EXTENDED_POLICY:
358 {
359 boolean_t timeshare = TRUE;
360
361 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 thread_extended_policy_t info;
363
364 info = (thread_extended_policy_t)policy_info;
365 timeshare = info->timeshare;
366 }
367
368 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369
370 spl_t s = splsched();
371 thread_lock(thread);
372
373 /*
374 * If the thread has previously requested realtime but is
375 * demoted with RT_RESTRICTED, undemote the thread before
376 * applying the new user sched mode. This prevents the thread
377 * being stuck at TIMESHARE or being made realtime unexpectedly
378 * (when undemoted).
379 */
380 if ((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) != 0) {
381 sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
382 }
383
384 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
385
386 thread_unlock(thread);
387 splx(s);
388
389 pend_token.tpt_update_thread_sfi = 1;
390
391 break;
392 }
393
394 case THREAD_TIME_CONSTRAINT_POLICY:
395 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
396 {
397 thread_time_constraint_with_priority_policy_t info;
398
399 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
400 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
401 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
402
403 if (count < min_count) {
404 result = KERN_INVALID_ARGUMENT;
405 break;
406 }
407
408 info = (thread_time_constraint_with_priority_policy_t)policy_info;
409
410
411 if (info->constraint < info->computation ||
412 info->computation > max_rt_quantum ||
413 info->computation < min_rt_quantum) {
414 result = KERN_INVALID_ARGUMENT;
415 break;
416 }
417
418 if (info->computation < (info->constraint / 2)) {
419 info->computation = (info->constraint / 2);
420 if (info->computation > max_rt_quantum) {
421 info->computation = max_rt_quantum;
422 }
423 }
424
425 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
426 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
427 result = KERN_INVALID_ARGUMENT;
428 break;
429 }
430 }
431
432 spl_t s = splsched();
433 thread_lock(thread);
434
435 thread->realtime.period = info->period;
436 thread->realtime.computation = info->computation;
437 thread->realtime.constraint = info->constraint;
438 thread->realtime.preemptible = info->preemptible;
439
440 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
441 &thread->th_work_interval_flags, relaxed);
442
443 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
444 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
445 } else if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
446 (th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_CRITICAL)) {
447 /* N.B. that criticality/realtime priority offset is currently not adjusted when the
448 * thread leaves the work interval, or only joins it after already having become realtime */
449 thread->realtime.priority_offset = 1;
450 } else {
451 thread->realtime.priority_offset = 0;
452 }
453
454 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
455
456 thread_unlock(thread);
457 splx(s);
458
459 thread_rt_evaluate(thread);
460
461 pend_token.tpt_update_thread_sfi = 1;
462
463 break;
464 }
465
466 case THREAD_PRECEDENCE_POLICY:
467 {
468 thread_precedence_policy_t info;
469
470 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
471 result = KERN_INVALID_ARGUMENT;
472 break;
473 }
474 info = (thread_precedence_policy_t)policy_info;
475
476 spl_t s = splsched();
477 thread_lock(thread);
478
479 thread->importance = info->importance;
480
481 thread_recompute_priority(thread);
482
483 thread_unlock(thread);
484 splx(s);
485
486 break;
487 }
488
489 case THREAD_AFFINITY_POLICY:
490 {
491 extern boolean_t affinity_sets_enabled;
492 thread_affinity_policy_t info;
493
494 if (!affinity_sets_enabled) {
495 result = KERN_INVALID_POLICY;
496 break;
497 }
498
499 if (!thread_affinity_is_supported()) {
500 result = KERN_NOT_SUPPORTED;
501 break;
502 }
503 if (count < THREAD_AFFINITY_POLICY_COUNT) {
504 result = KERN_INVALID_ARGUMENT;
505 break;
506 }
507
508 info = (thread_affinity_policy_t) policy_info;
509 /*
510 * Unlock the thread mutex here and
511 * return directly after calling thread_affinity_set().
512 * This is necessary for correct lock ordering because
513 * thread_affinity_set() takes the task lock.
514 */
515 thread_mtx_unlock(thread);
516 return thread_affinity_set(thread, info->affinity_tag);
517 }
518
519 #if !defined(XNU_TARGET_OS_OSX)
520 case THREAD_BACKGROUND_POLICY:
521 {
522 thread_background_policy_t info;
523
524 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
525 result = KERN_INVALID_ARGUMENT;
526 break;
527 }
528
529 if (get_threadtask(thread) != current_task()) {
530 result = KERN_PROTECTION_FAILURE;
531 break;
532 }
533
534 info = (thread_background_policy_t) policy_info;
535
536 int enable;
537
538 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
539 enable = TASK_POLICY_ENABLE;
540 } else {
541 enable = TASK_POLICY_DISABLE;
542 }
543
544 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
545
546 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
547
548 break;
549 }
550 #endif /* !defined(XNU_TARGET_OS_OSX) */
551
552 case THREAD_THROUGHPUT_QOS_POLICY:
553 {
554 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
555 thread_throughput_qos_t tqos;
556
557 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
558 result = KERN_INVALID_ARGUMENT;
559 break;
560 }
561
562 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
563 break;
564 }
565
566 tqos = qos_extract(info->thread_throughput_qos_tier);
567
568 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
569 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
570
571 break;
572 }
573
574 case THREAD_LATENCY_QOS_POLICY:
575 {
576 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
577 thread_latency_qos_t lqos;
578
579 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
580 result = KERN_INVALID_ARGUMENT;
581 break;
582 }
583
584 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
585 break;
586 }
587
588 lqos = qos_extract(info->thread_latency_qos_tier);
589
590 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
591 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
592
593 break;
594 }
595
596 case THREAD_QOS_POLICY:
597 {
598 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
599
600 if (count < THREAD_QOS_POLICY_COUNT) {
601 result = KERN_INVALID_ARGUMENT;
602 break;
603 }
604
605 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
606 result = KERN_INVALID_ARGUMENT;
607 break;
608 }
609
610 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
611 result = KERN_INVALID_ARGUMENT;
612 break;
613 }
614
615 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
616 result = KERN_INVALID_ARGUMENT;
617 break;
618 }
619
620 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
621 info->qos_tier, -info->tier_importance, &pend_token);
622
623 break;
624 }
625
626 default:
627 result = KERN_INVALID_ARGUMENT;
628 break;
629 }
630
631 thread_mtx_unlock(thread);
632
633 thread_policy_update_complete_unlocked(thread, &pend_token);
634
635 return result;
636 }
637
638 /*
639 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
640 * Both result in FIXED mode scheduling.
641 */
642 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)643 convert_policy_to_sched_mode(integer_t policy)
644 {
645 switch (policy) {
646 case POLICY_TIMESHARE:
647 return TH_MODE_TIMESHARE;
648 case POLICY_RR:
649 case POLICY_FIFO:
650 return TH_MODE_FIXED;
651 default:
652 panic("unexpected sched policy: %d", policy);
653 return TH_MODE_NONE;
654 }
655 }
656
657 /*
658 * Called either with the thread mutex locked
659 * or from the pthread kext in a 'safe place'.
660 */
661 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)662 thread_set_mode_and_absolute_pri_internal(thread_t thread,
663 sched_mode_t mode,
664 integer_t priority,
665 task_pend_token_t pend_token)
666 {
667 kern_return_t kr = KERN_SUCCESS;
668
669 spl_t s = splsched();
670 thread_lock(thread);
671
672 /* This path isn't allowed to change a thread out of realtime. */
673 if ((thread->sched_mode == TH_MODE_REALTIME) ||
674 (thread->saved_mode == TH_MODE_REALTIME)) {
675 kr = KERN_FAILURE;
676 goto unlock;
677 }
678
679 if (thread->policy_reset) {
680 kr = KERN_SUCCESS;
681 goto unlock;
682 }
683
684 sched_mode_t old_mode = thread->sched_mode;
685
686 /*
687 * Reverse engineer and apply the correct importance value
688 * from the requested absolute priority value.
689 *
690 * TODO: Store the absolute priority value instead
691 */
692
693 if (priority >= thread->max_priority) {
694 priority = thread->max_priority - thread->task_priority;
695 } else if (priority >= MINPRI_KERNEL) {
696 priority -= MINPRI_KERNEL;
697 } else if (priority >= MINPRI_RESERVED) {
698 priority -= MINPRI_RESERVED;
699 } else {
700 priority -= BASEPRI_DEFAULT;
701 }
702
703 priority += thread->task_priority;
704
705 if (priority > thread->max_priority) {
706 priority = thread->max_priority;
707 } else if (priority < MINPRI) {
708 priority = MINPRI;
709 }
710
711 thread->importance = priority - thread->task_priority;
712
713 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
714
715 if (mode != old_mode) {
716 pend_token->tpt_update_thread_sfi = 1;
717 }
718
719 unlock:
720 thread_unlock(thread);
721 splx(s);
722
723 return kr;
724 }
725
726 void
thread_freeze_base_pri(thread_t thread)727 thread_freeze_base_pri(thread_t thread)
728 {
729 assert(thread == current_thread());
730
731 spl_t s = splsched();
732 thread_lock(thread);
733
734 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
735 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
736
737 thread_unlock(thread);
738 splx(s);
739 }
740
741 bool
thread_unfreeze_base_pri(thread_t thread)742 thread_unfreeze_base_pri(thread_t thread)
743 {
744 assert(thread == current_thread());
745 integer_t base_pri;
746 ast_t ast = 0;
747
748 spl_t s = splsched();
749 thread_lock(thread);
750
751 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
752 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
753
754 base_pri = thread->req_base_pri;
755 if (base_pri != thread->base_pri) {
756 /*
757 * This function returns "true" if the base pri change
758 * is the most likely cause for the preemption.
759 */
760 sched_set_thread_base_priority(thread, base_pri);
761 ast = ast_peek(AST_PREEMPT);
762 }
763
764 thread_unlock(thread);
765 splx(s);
766
767 return ast != 0;
768 }
769
770 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)771 thread_workq_pri_for_qos(thread_qos_t qos)
772 {
773 assert(qos < THREAD_QOS_LAST);
774 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
775 }
776
777 thread_qos_t
thread_workq_qos_for_pri(int priority)778 thread_workq_qos_for_pri(int priority)
779 {
780 thread_qos_t qos;
781 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
782 // indicate that workq should map >UI threads to workq's
783 // internal notation for above-UI work.
784 return THREAD_QOS_UNSPECIFIED;
785 }
786 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
787 // map a given priority up to the next nearest qos band.
788 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
789 return qos;
790 }
791 }
792 return THREAD_QOS_MAINTENANCE;
793 }
794
795 /*
796 * private interface for pthread workqueues
797 *
798 * Set scheduling policy & absolute priority for thread
799 * May be called with spinlocks held
800 * Thread mutex lock is not held
801 */
802 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)803 thread_reset_workq_qos(thread_t thread, uint32_t qos)
804 {
805 struct task_pend_token pend_token = {};
806
807 assert(qos < THREAD_QOS_LAST);
808
809 spl_t s = splsched();
810 thread_lock(thread);
811
812 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
813 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
814 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
815 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
816 &pend_token);
817
818 assert(pend_token.tpt_update_sockets == 0);
819
820 thread_unlock(thread);
821 splx(s);
822
823 thread_policy_update_complete_unlocked(thread, &pend_token);
824 }
825
826 /*
827 * private interface for pthread workqueues
828 *
829 * Set scheduling policy & absolute priority for thread
830 * May be called with spinlocks held
831 * Thread mutex lock is held
832 */
833 void
thread_set_workq_override(thread_t thread,uint32_t qos)834 thread_set_workq_override(thread_t thread, uint32_t qos)
835 {
836 struct task_pend_token pend_token = {};
837
838 assert(qos < THREAD_QOS_LAST);
839
840 spl_t s = splsched();
841 thread_lock(thread);
842
843 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
844 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
845
846 assert(pend_token.tpt_update_sockets == 0);
847
848 thread_unlock(thread);
849 splx(s);
850
851 thread_policy_update_complete_unlocked(thread, &pend_token);
852 }
853
854 /*
855 * private interface for pthread workqueues
856 *
857 * Set scheduling policy & absolute priority for thread
858 * May be called with spinlocks held
859 * Thread mutex lock is not held
860 */
861 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)862 thread_set_workq_pri(thread_t thread,
863 thread_qos_t qos,
864 integer_t priority,
865 integer_t policy)
866 {
867 struct task_pend_token pend_token = {};
868 sched_mode_t mode = convert_policy_to_sched_mode(policy);
869
870 assert(qos < THREAD_QOS_LAST);
871 assert(thread->static_param);
872
873 if (!thread->static_param || !thread->active) {
874 return;
875 }
876
877 spl_t s = splsched();
878 thread_lock(thread);
879
880 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
881 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
882 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
883 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
884 0, &pend_token);
885
886 thread_unlock(thread);
887 splx(s);
888
889 /* Concern: this doesn't hold the mutex... */
890
891 __assert_only kern_return_t kr;
892 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
893 &pend_token);
894 assert(kr == KERN_SUCCESS);
895
896 if (pend_token.tpt_update_thread_sfi) {
897 sfi_reevaluate(thread);
898 }
899 }
900
901 /*
902 * thread_set_mode_and_absolute_pri:
903 *
904 * Set scheduling policy & absolute priority for thread, for deprecated
905 * thread_set_policy and thread_policy interfaces.
906 *
907 * Called with nothing locked.
908 */
909 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)910 thread_set_mode_and_absolute_pri(thread_t thread,
911 integer_t policy,
912 integer_t priority)
913 {
914 kern_return_t kr = KERN_SUCCESS;
915 struct task_pend_token pend_token = {};
916
917 sched_mode_t mode = convert_policy_to_sched_mode(policy);
918
919 thread_mtx_lock(thread);
920
921 if (!thread->active) {
922 kr = KERN_TERMINATED;
923 goto unlock;
924 }
925
926 if (thread_is_static_param(thread)) {
927 kr = KERN_POLICY_STATIC;
928 goto unlock;
929 }
930
931 /* Setting legacy policies on threads kills the current QoS */
932 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
933 thread_remove_qos_policy_locked(thread, &pend_token);
934 }
935
936 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
937
938 unlock:
939 thread_mtx_unlock(thread);
940
941 thread_policy_update_complete_unlocked(thread, &pend_token);
942
943 return kr;
944 }
945
946 /*
947 * Set the thread's requested mode and recompute priority
948 * Called with thread mutex and thread locked
949 *
950 * TODO: Mitigate potential problems caused by moving thread to end of runq
951 * whenever its priority is recomputed
952 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
953 */
954 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)955 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
956 {
957 if (thread->policy_reset) {
958 return;
959 }
960
961 boolean_t removed = thread_run_queue_remove(thread);
962
963 sched_set_thread_mode_user(thread, mode);
964
965 thread_recompute_priority(thread);
966
967 if (removed) {
968 thread_run_queue_reinsert(thread, SCHED_TAILQ);
969 }
970 }
971
972 /* called at splsched with thread lock locked */
973 static void
thread_update_qos_cpu_time_locked(thread_t thread)974 thread_update_qos_cpu_time_locked(thread_t thread)
975 {
976 task_t task = get_threadtask(thread);
977 uint64_t timer_sum, timer_delta;
978
979 /*
980 * This is only as accurate the thread's last context switch or user/kernel
981 * transition (unless precise user/kernel time is disabled).
982 *
983 * TODO: Consider running an update operation here to update it first.
984 * Maybe doable with interrupts disabled from current thread.
985 * If the thread is on a different core, may not be easy to get right.
986 */
987
988 timer_sum = recount_thread_time_mach(thread);
989 timer_delta = timer_sum - thread->vtimer_qos_save;
990
991 thread->vtimer_qos_save = timer_sum;
992
993 uint64_t* task_counter = NULL;
994
995 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
996 switch (thread->effective_policy.thep_qos) {
997 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
998 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
999 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1000 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1001 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1002 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1003 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1004 default:
1005 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1006 }
1007
1008 OSAddAtomic64(timer_delta, task_counter);
1009
1010 /* Update the task-level qos stats atomically, because we don't have the task lock. */
1011 switch (thread->requested_policy.thrp_qos) {
1012 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1013 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1014 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1015 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1016 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1017 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1018 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1019 default:
1020 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1021 }
1022
1023 OSAddAtomic64(timer_delta, task_counter);
1024 }
1025
1026 /*
1027 * called with no thread locks held
1028 * may hold task lock
1029 */
1030 void
thread_update_qos_cpu_time(thread_t thread)1031 thread_update_qos_cpu_time(thread_t thread)
1032 {
1033 thread_mtx_lock(thread);
1034
1035 spl_t s = splsched();
1036 thread_lock(thread);
1037
1038 thread_update_qos_cpu_time_locked(thread);
1039
1040 thread_unlock(thread);
1041 splx(s);
1042
1043 thread_mtx_unlock(thread);
1044 }
1045
1046 /*
1047 * Calculate base priority from thread attributes, and set it on the thread
1048 *
1049 * Called with thread_lock and thread mutex held.
1050 */
1051 extern boolean_t vps_dynamic_priority_enabled;
1052
1053 void
thread_recompute_priority(thread_t thread)1054 thread_recompute_priority(
1055 thread_t thread)
1056 {
1057 integer_t priority;
1058 integer_t adj_priority;
1059
1060 if (thread->policy_reset) {
1061 return;
1062 }
1063
1064 if (thread->sched_mode == TH_MODE_REALTIME) {
1065 uint8_t i = thread->realtime.priority_offset;
1066 assert((i >= 0) && (i < NRTQS));
1067 priority = BASEPRI_RTQUEUES + i;
1068 sched_set_thread_base_priority(thread, priority);
1069 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1070 /* Make sure the thread has a valid deadline */
1071 uint64_t ctime = mach_absolute_time();
1072 thread->realtime.deadline = thread->realtime.constraint + ctime;
1073 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1074 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1075 }
1076 return;
1077 } else if (thread->effective_policy.thep_wi_driven) {
1078 priority = work_interval_get_priority(thread);
1079 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1080 int qos = thread->effective_policy.thep_qos;
1081 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1082 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1083 int qos_scaled_relprio;
1084
1085 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1086 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1087
1088 priority = thread_qos_policy_params.qos_pri[qos];
1089 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1090
1091 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1092 /* Bump priority 46 to 47 when in a frontmost app */
1093 qos_scaled_relprio += 1;
1094 }
1095
1096 /* TODO: factor in renice priority here? */
1097
1098 priority += qos_scaled_relprio;
1099 } else {
1100 if (thread->importance > MAXPRI) {
1101 priority = MAXPRI;
1102 } else if (thread->importance < -MAXPRI) {
1103 priority = -MAXPRI;
1104 } else {
1105 priority = thread->importance;
1106 }
1107
1108 priority += thread->task_priority;
1109 }
1110
1111 priority = MAX(priority, thread->user_promotion_basepri);
1112
1113 /*
1114 * Clamp priority back into the allowed range for this task.
1115 * The initial priority value could be out of this range due to:
1116 * Task clamped to BG or Utility (max-pri is 4, or 20)
1117 * Task is user task (max-pri is 63)
1118 * Task is kernel task (max-pri is 95)
1119 * Note that thread->importance is user-settable to any integer
1120 * via THREAD_PRECEDENCE_POLICY.
1121 */
1122 adj_priority = priority;
1123 adj_priority = MIN(adj_priority, thread->max_priority);
1124 adj_priority = MAX(adj_priority, MINPRI);
1125
1126 /* Allow workload driven priorities to exceed max_priority. */
1127 if (thread->effective_policy.thep_wi_driven) {
1128 adj_priority = MAX(adj_priority, priority);
1129 }
1130
1131 /* Allow priority to exceed max_priority for promotions. */
1132 if (thread->effective_policy.thep_promote_above_task) {
1133 adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1134 }
1135 priority = adj_priority;
1136 assert3u(priority, <=, MAXPRI);
1137
1138 if (thread->saved_mode == TH_MODE_REALTIME &&
1139 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1140 priority = DEPRESSPRI;
1141 }
1142
1143 if (thread->effective_policy.thep_terminated == TRUE) {
1144 /*
1145 * We temporarily want to override the expected priority to
1146 * ensure that the thread exits in a timely manner.
1147 * Note that this is allowed to exceed thread->max_priority
1148 * so that the thread is no longer clamped to background
1149 * during the final exit phase.
1150 */
1151 if (priority < thread->task_priority) {
1152 priority = thread->task_priority;
1153 }
1154 if (priority < BASEPRI_DEFAULT) {
1155 priority = BASEPRI_DEFAULT;
1156 }
1157 }
1158
1159 #if !defined(XNU_TARGET_OS_OSX)
1160 /* No one can have a base priority less than MAXPRI_THROTTLE */
1161 if (priority < MAXPRI_THROTTLE) {
1162 priority = MAXPRI_THROTTLE;
1163 }
1164 #endif /* !defined(XNU_TARGET_OS_OSX) */
1165
1166 sched_set_thread_base_priority(thread, priority);
1167 }
1168
1169 /* Called with the task lock held, but not the thread mutex or spinlock */
1170 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1171 thread_policy_update_tasklocked(
1172 thread_t thread,
1173 integer_t priority,
1174 integer_t max_priority,
1175 task_pend_token_t pend_token)
1176 {
1177 thread_mtx_lock(thread);
1178
1179 if (!thread->active || thread->policy_reset) {
1180 thread_mtx_unlock(thread);
1181 return;
1182 }
1183
1184 spl_t s = splsched();
1185 thread_lock(thread);
1186
1187 __unused
1188 integer_t old_max_priority = thread->max_priority;
1189
1190 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1191 thread->task_priority = (int16_t)priority;
1192
1193 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1194 thread->max_priority = (int16_t)max_priority;
1195
1196 /*
1197 * When backgrounding a thread, realtime and fixed priority threads
1198 * should be demoted to timeshare background threads.
1199 *
1200 * TODO: Do this inside the thread policy update routine in order to avoid double
1201 * remove/reinsert for a runnable thread
1202 */
1203 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1204 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1205 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1206 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1207 }
1208
1209 thread_policy_update_spinlocked(thread, true, pend_token);
1210
1211 thread_unlock(thread);
1212 splx(s);
1213
1214 thread_mtx_unlock(thread);
1215 }
1216
1217 /*
1218 * Reset thread to default state in preparation for termination
1219 * Called with thread mutex locked
1220 *
1221 * Always called on current thread, so we don't need a run queue remove
1222 */
1223 void
thread_policy_reset(thread_t thread)1224 thread_policy_reset(
1225 thread_t thread)
1226 {
1227 spl_t s;
1228
1229 assert(thread == current_thread());
1230
1231 s = splsched();
1232 thread_lock(thread);
1233
1234 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1235 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1236 }
1237
1238 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1239 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1240 }
1241
1242 if (thread->sched_flags & TH_SFLAG_RT_RESTRICTED) {
1243 sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
1244 }
1245
1246 /* At this point, the various demotions should be inactive */
1247 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1248 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1249
1250 /* Reset thread back to task-default basepri and mode */
1251 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1252
1253 sched_set_thread_mode(thread, newmode);
1254
1255 thread->importance = 0;
1256
1257 /* Prevent further changes to thread base priority or mode */
1258 thread->policy_reset = 1;
1259
1260 sched_set_thread_base_priority(thread, thread->task_priority);
1261
1262 thread_unlock(thread);
1263 splx(s);
1264 }
1265
1266 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1267 thread_policy_get(
1268 thread_t thread,
1269 thread_policy_flavor_t flavor,
1270 thread_policy_t policy_info,
1271 mach_msg_type_number_t *count,
1272 boolean_t *get_default)
1273 {
1274 kern_return_t result = KERN_SUCCESS;
1275
1276 if (thread == THREAD_NULL) {
1277 return KERN_INVALID_ARGUMENT;
1278 }
1279
1280 thread_mtx_lock(thread);
1281 if (!thread->active) {
1282 thread_mtx_unlock(thread);
1283
1284 return KERN_TERMINATED;
1285 }
1286
1287 switch (flavor) {
1288 case THREAD_EXTENDED_POLICY:
1289 {
1290 boolean_t timeshare = TRUE;
1291
1292 if (!(*get_default)) {
1293 spl_t s = splsched();
1294 thread_lock(thread);
1295
1296 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1297 (thread->saved_mode != TH_MODE_REALTIME)) {
1298 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1299 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1300 } else {
1301 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1302 }
1303 } else {
1304 *get_default = TRUE;
1305 }
1306
1307 thread_unlock(thread);
1308 splx(s);
1309 }
1310
1311 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1312 thread_extended_policy_t info;
1313
1314 info = (thread_extended_policy_t)policy_info;
1315 info->timeshare = timeshare;
1316 }
1317
1318 break;
1319 }
1320
1321 case THREAD_TIME_CONSTRAINT_POLICY:
1322 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1323 {
1324 thread_time_constraint_with_priority_policy_t info;
1325
1326 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1327 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1328 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1329
1330 if (*count < min_count) {
1331 result = KERN_INVALID_ARGUMENT;
1332 break;
1333 }
1334
1335 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1336
1337 if (!(*get_default)) {
1338 spl_t s = splsched();
1339 thread_lock(thread);
1340
1341 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1342 (thread->saved_mode == TH_MODE_REALTIME)) {
1343 info->period = thread->realtime.period;
1344 info->computation = thread->realtime.computation;
1345 info->constraint = thread->realtime.constraint;
1346 info->preemptible = thread->realtime.preemptible;
1347 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1348 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1349 }
1350 } else {
1351 *get_default = TRUE;
1352 }
1353
1354 thread_unlock(thread);
1355 splx(s);
1356 }
1357
1358 if (*get_default) {
1359 info->period = 0;
1360 info->computation = default_timeshare_computation;
1361 info->constraint = default_timeshare_constraint;
1362 info->preemptible = TRUE;
1363 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1364 info->priority = BASEPRI_RTQUEUES;
1365 }
1366 }
1367
1368
1369 break;
1370 }
1371
1372 case THREAD_PRECEDENCE_POLICY:
1373 {
1374 thread_precedence_policy_t info;
1375
1376 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1377 result = KERN_INVALID_ARGUMENT;
1378 break;
1379 }
1380
1381 info = (thread_precedence_policy_t)policy_info;
1382
1383 if (!(*get_default)) {
1384 spl_t s = splsched();
1385 thread_lock(thread);
1386
1387 info->importance = thread->importance;
1388
1389 thread_unlock(thread);
1390 splx(s);
1391 } else {
1392 info->importance = 0;
1393 }
1394
1395 break;
1396 }
1397
1398 case THREAD_AFFINITY_POLICY:
1399 {
1400 thread_affinity_policy_t info;
1401
1402 if (!thread_affinity_is_supported()) {
1403 result = KERN_NOT_SUPPORTED;
1404 break;
1405 }
1406 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1407 result = KERN_INVALID_ARGUMENT;
1408 break;
1409 }
1410
1411 info = (thread_affinity_policy_t)policy_info;
1412
1413 if (!(*get_default)) {
1414 info->affinity_tag = thread_affinity_get(thread);
1415 } else {
1416 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1417 }
1418
1419 break;
1420 }
1421
1422 case THREAD_POLICY_STATE:
1423 {
1424 thread_policy_state_t info;
1425
1426 if (*count < THREAD_POLICY_STATE_COUNT) {
1427 result = KERN_INVALID_ARGUMENT;
1428 break;
1429 }
1430
1431 /* Only root can get this info */
1432 if (!task_is_privileged(current_task())) {
1433 result = KERN_PROTECTION_FAILURE;
1434 break;
1435 }
1436
1437 info = (thread_policy_state_t)(void*)policy_info;
1438
1439 if (!(*get_default)) {
1440 info->flags = 0;
1441
1442 spl_t s = splsched();
1443 thread_lock(thread);
1444
1445 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1446
1447 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1448 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1449
1450 info->thps_user_promotions = 0;
1451 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1452 info->thps_ipc_overrides = thread->kevent_overrides;
1453
1454 proc_get_thread_policy_bitfield(thread, info);
1455
1456 thread_unlock(thread);
1457 splx(s);
1458 } else {
1459 info->requested = 0;
1460 info->effective = 0;
1461 info->pending = 0;
1462 }
1463
1464 break;
1465 }
1466
1467 case THREAD_REQUESTED_STATE_POLICY:
1468 {
1469 if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1470 result = KERN_INVALID_ARGUMENT;
1471 break;
1472 }
1473
1474 thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1475 struct thread_requested_policy *req_policy = &thread->requested_policy;
1476
1477 info->thrq_base_qos = req_policy->thrp_qos;
1478 info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1479 info->thrq_qos_override = req_policy->thrp_qos_override;
1480 info->thrq_qos_promote = req_policy->thrp_qos_promote;
1481 info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1482 info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1483 info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1484
1485 break;
1486 }
1487
1488 case THREAD_LATENCY_QOS_POLICY:
1489 {
1490 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1491 thread_latency_qos_t plqos;
1492
1493 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1494 result = KERN_INVALID_ARGUMENT;
1495 break;
1496 }
1497
1498 if (*get_default) {
1499 plqos = 0;
1500 } else {
1501 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1502 }
1503
1504 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1505 }
1506 break;
1507
1508 case THREAD_THROUGHPUT_QOS_POLICY:
1509 {
1510 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1511 thread_throughput_qos_t ptqos;
1512
1513 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1514 result = KERN_INVALID_ARGUMENT;
1515 break;
1516 }
1517
1518 if (*get_default) {
1519 ptqos = 0;
1520 } else {
1521 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1522 }
1523
1524 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1525 }
1526 break;
1527
1528 case THREAD_QOS_POLICY:
1529 {
1530 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1531
1532 if (*count < THREAD_QOS_POLICY_COUNT) {
1533 result = KERN_INVALID_ARGUMENT;
1534 break;
1535 }
1536
1537 if (!(*get_default)) {
1538 int relprio_value = 0;
1539 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1540 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1541
1542 info->tier_importance = -relprio_value;
1543 } else {
1544 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1545 info->tier_importance = 0;
1546 }
1547
1548 break;
1549 }
1550
1551 default:
1552 result = KERN_INVALID_ARGUMENT;
1553 break;
1554 }
1555
1556 thread_mtx_unlock(thread);
1557
1558 return result;
1559 }
1560
1561 void
thread_policy_create(thread_t thread)1562 thread_policy_create(thread_t thread)
1563 {
1564 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1565 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1566 thread_tid(thread), theffective_0(thread),
1567 theffective_1(thread), thread->base_pri, 0);
1568
1569 /* We pass a pend token but ignore it */
1570 struct task_pend_token pend_token = {};
1571
1572 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1573
1574 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1575 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1576 thread_tid(thread), theffective_0(thread),
1577 theffective_1(thread), thread->base_pri, 0);
1578 }
1579
1580 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1581 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1582 {
1583 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1584 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1585 thread_tid(thread), theffective_0(thread),
1586 theffective_1(thread), thread->base_pri, 0);
1587
1588 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1589
1590 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1591 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1592 thread_tid(thread), theffective_0(thread),
1593 theffective_1(thread), thread->base_pri, 0);
1594 }
1595
1596
1597
1598 /*
1599 * One thread state update function TO RULE THEM ALL
1600 *
1601 * This function updates the thread effective policy fields
1602 * and pushes the results to the relevant subsystems.
1603 *
1604 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1605 */
1606 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1607 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1608 task_pend_token_t pend_token)
1609 {
1610 /*
1611 * Step 1:
1612 * Gather requested policy and effective task state
1613 */
1614
1615 const struct thread_requested_policy requested = thread->requested_policy;
1616 const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1617
1618 /*
1619 * Step 2:
1620 * Calculate new effective policies from requested policy, task and thread state
1621 * Rules:
1622 * Don't change requested, it won't take effect
1623 */
1624
1625 struct thread_effective_policy next = {};
1626
1627 next.thep_wi_driven = requested.thrp_wi_driven;
1628
1629 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1630
1631 uint32_t next_qos = requested.thrp_qos;
1632
1633 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1634 next_qos = MAX(requested.thrp_qos_override, next_qos);
1635 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1636 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1637 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1638 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1639 }
1640
1641 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1642 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1643 /*
1644 * This thread is turnstile-boosted higher than the adaptive clamp
1645 * by a synchronous waiter. Allow that to override the adaptive
1646 * clamp temporarily for this thread only.
1647 */
1648 next.thep_promote_above_task = true;
1649 next_qos = requested.thrp_qos_promote;
1650 }
1651
1652 next.thep_qos = next_qos;
1653
1654 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1655 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1656 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1657 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1658 } else {
1659 next.thep_qos = task_effective.tep_qos_clamp;
1660 }
1661 next.thep_wi_driven = 0;
1662 }
1663
1664 /*
1665 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1666 * This allows QoS promotions to work properly even after the process is unclamped.
1667 */
1668 next.thep_qos_promote = next.thep_qos;
1669
1670 /* The ceiling only applies to threads that are in the QoS world */
1671 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1672 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1673 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1674 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1675 }
1676
1677 /*
1678 * The QoS relative priority is only applicable when the original programmer's
1679 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1680 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1681 * since otherwise it would be lower than unclamped threads. Similarly, in the
1682 * presence of boosting, the programmer doesn't know what other actors
1683 * are boosting the thread.
1684 */
1685 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1686 (requested.thrp_qos == next.thep_qos) &&
1687 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1688 next.thep_qos_relprio = requested.thrp_qos_relprio;
1689 } else {
1690 next.thep_qos_relprio = 0;
1691 }
1692
1693 /* Calculate DARWIN_BG */
1694 bool wants_darwinbg = false;
1695 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1696
1697 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1698 wants_darwinbg = true;
1699 }
1700
1701 /*
1702 * If DARWIN_BG has been requested at either level, it's engaged.
1703 * darwinbg threads always create bg sockets,
1704 * but only some types of darwinbg change the sockets
1705 * after they're created
1706 */
1707 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1708 wants_all_sockets_bg = wants_darwinbg = true;
1709 }
1710
1711 if (requested.thrp_pidbind_bg) {
1712 wants_all_sockets_bg = wants_darwinbg = true;
1713 }
1714
1715 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1716 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1717 wants_darwinbg = true;
1718 }
1719
1720 /* Calculate side effects of DARWIN_BG */
1721
1722 if (wants_darwinbg) {
1723 next.thep_darwinbg = 1;
1724 next.thep_wi_driven = 0;
1725 }
1726
1727 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1728 next.thep_new_sockets_bg = 1;
1729 }
1730
1731 /* Don't use task_effective.tep_all_sockets_bg here */
1732 if (wants_all_sockets_bg) {
1733 next.thep_all_sockets_bg = 1;
1734 }
1735
1736 /* darwinbg implies background QOS (or lower) */
1737 if (next.thep_darwinbg &&
1738 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1739 next.thep_qos = THREAD_QOS_BACKGROUND;
1740 next.thep_qos_relprio = 0;
1741 }
1742
1743 /* Calculate IO policy */
1744
1745 int iopol = THROTTLE_LEVEL_TIER0;
1746
1747 /* Factor in the task's IO policy */
1748 if (next.thep_darwinbg) {
1749 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1750 }
1751
1752 if (!next.thep_promote_above_task) {
1753 iopol = MAX(iopol, task_effective.tep_io_tier);
1754 }
1755
1756 /* Look up the associated IO tier value for the QoS class */
1757 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1758
1759 iopol = MAX(iopol, requested.thrp_int_iotier);
1760 iopol = MAX(iopol, requested.thrp_ext_iotier);
1761
1762 /* Apply the kevent iotier override */
1763 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1764
1765 next.thep_io_tier = iopol;
1766
1767 /*
1768 * If a QoS override is causing IO to go into a lower tier, we also set
1769 * the passive bit so that a thread doesn't end up stuck in its own throttle
1770 * window when the override goes away.
1771 */
1772
1773 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1774 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1775 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1776
1777 /* Calculate Passive IO policy */
1778 if (requested.thrp_ext_iopassive ||
1779 requested.thrp_int_iopassive ||
1780 qos_io_override_active ||
1781 task_effective.tep_io_passive) {
1782 next.thep_io_passive = 1;
1783 }
1784
1785 /* Calculate timer QOS */
1786 uint32_t latency_qos = requested.thrp_latency_qos;
1787
1788 if (!next.thep_promote_above_task) {
1789 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1790 }
1791
1792 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1793
1794 next.thep_latency_qos = latency_qos;
1795
1796 /* Calculate throughput QOS */
1797 uint32_t through_qos = requested.thrp_through_qos;
1798
1799 if (!next.thep_promote_above_task) {
1800 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1801 }
1802
1803 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1804
1805 next.thep_through_qos = through_qos;
1806
1807 if (task_effective.tep_terminated || requested.thrp_terminated) {
1808 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1809 next.thep_terminated = 1;
1810 next.thep_darwinbg = 0;
1811 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1812 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1813 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1814 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1815 next.thep_wi_driven = 0;
1816 }
1817
1818 /*
1819 * Step 3:
1820 * Swap out old policy for new policy
1821 */
1822
1823 struct thread_effective_policy prev = thread->effective_policy;
1824
1825 thread_update_qos_cpu_time_locked(thread);
1826
1827 /* This is the point where the new values become visible to other threads */
1828 thread->effective_policy = next;
1829
1830 /*
1831 * Step 4:
1832 * Pend updates that can't be done while holding the thread lock
1833 */
1834
1835 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1836 pend_token->tpt_update_sockets = 1;
1837 }
1838
1839 /* TODO: Doesn't this only need to be done if the throttle went up? */
1840 if (prev.thep_io_tier != next.thep_io_tier) {
1841 pend_token->tpt_update_throttle = 1;
1842 }
1843
1844 /*
1845 * Check for the attributes that sfi_thread_classify() consults,
1846 * and trigger SFI re-evaluation.
1847 */
1848 if (prev.thep_qos != next.thep_qos ||
1849 prev.thep_darwinbg != next.thep_darwinbg) {
1850 pend_token->tpt_update_thread_sfi = 1;
1851 }
1852
1853 integer_t old_base_pri = thread->base_pri;
1854
1855 /*
1856 * Step 5:
1857 * Update other subsystems as necessary if something has changed
1858 */
1859
1860 /* Check for the attributes that thread_recompute_priority() consults */
1861 if (prev.thep_qos != next.thep_qos ||
1862 prev.thep_qos_relprio != next.thep_qos_relprio ||
1863 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1864 prev.thep_promote_above_task != next.thep_promote_above_task ||
1865 prev.thep_terminated != next.thep_terminated ||
1866 prev.thep_wi_driven != next.thep_wi_driven ||
1867 pend_token->tpt_force_recompute_pri == 1 ||
1868 recompute_priority) {
1869 thread_recompute_priority(thread);
1870 }
1871
1872 /*
1873 * Check if the thread is waiting on a turnstile and needs priority propagation.
1874 */
1875 if (pend_token->tpt_update_turnstile &&
1876 ((old_base_pri == thread->base_pri) ||
1877 !thread_get_waiting_turnstile(thread))) {
1878 /*
1879 * Reset update turnstile pend token since either
1880 * the thread priority did not change or thread is
1881 * not blocked on a turnstile.
1882 */
1883 pend_token->tpt_update_turnstile = 0;
1884 }
1885 }
1886
1887
1888 /*
1889 * Initiate a thread policy state transition on a thread with its TID
1890 * Useful if you cannot guarantee the thread won't get terminated
1891 * Precondition: No locks are held
1892 * Will take task lock - using the non-tid variant is faster
1893 * if you already have a thread ref.
1894 */
1895 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1896 proc_set_thread_policy_with_tid(task_t task,
1897 uint64_t tid,
1898 int category,
1899 int flavor,
1900 int value)
1901 {
1902 /* takes task lock, returns ref'ed thread or NULL */
1903 thread_t thread = task_findtid(task, tid);
1904
1905 if (thread == THREAD_NULL) {
1906 return;
1907 }
1908
1909 proc_set_thread_policy(thread, category, flavor, value);
1910
1911 thread_deallocate(thread);
1912 }
1913
1914 /*
1915 * Initiate a thread policy transition on a thread
1916 * This path supports networking transitions (i.e. darwinbg transitions)
1917 * Precondition: No locks are held
1918 */
1919 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1920 proc_set_thread_policy(thread_t thread,
1921 int category,
1922 int flavor,
1923 int value)
1924 {
1925 struct task_pend_token pend_token = {};
1926
1927 thread_mtx_lock(thread);
1928
1929 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1930
1931 thread_mtx_unlock(thread);
1932
1933 thread_policy_update_complete_unlocked(thread, &pend_token);
1934 }
1935
1936 /*
1937 * Do the things that can't be done while holding a thread mutex.
1938 * These are set up to call back into thread policy to get the latest value,
1939 * so they don't have to be synchronized with the update.
1940 * The only required semantic is 'call this sometime after updating effective policy'
1941 *
1942 * Precondition: Thread mutex is not held
1943 *
1944 * This may be called with the task lock held, but in that case it won't be
1945 * called with tpt_update_sockets set.
1946 */
1947 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1948 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1949 {
1950 #ifdef MACH_BSD
1951 if (pend_token->tpt_update_sockets) {
1952 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1953 }
1954 #endif /* MACH_BSD */
1955
1956 if (pend_token->tpt_update_throttle) {
1957 rethrottle_thread(get_bsdthread_info(thread));
1958 }
1959
1960 if (pend_token->tpt_update_thread_sfi) {
1961 sfi_reevaluate(thread);
1962 }
1963
1964 if (pend_token->tpt_update_turnstile) {
1965 turnstile_update_thread_priority_chain(thread);
1966 }
1967 }
1968
1969 /*
1970 * Set and update thread policy
1971 * Thread mutex might be held
1972 */
1973 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1974 proc_set_thread_policy_locked(thread_t thread,
1975 int category,
1976 int flavor,
1977 int value,
1978 int value2,
1979 task_pend_token_t pend_token)
1980 {
1981 spl_t s = splsched();
1982 thread_lock(thread);
1983
1984 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1985
1986 thread_unlock(thread);
1987 splx(s);
1988 }
1989
1990 /*
1991 * Set and update thread policy
1992 * Thread spinlock is held
1993 */
1994 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1995 proc_set_thread_policy_spinlocked(thread_t thread,
1996 int category,
1997 int flavor,
1998 int value,
1999 int value2,
2000 task_pend_token_t pend_token)
2001 {
2002 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2003 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2004 thread_tid(thread), threquested_0(thread),
2005 threquested_1(thread), value, 0);
2006
2007 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2008
2009 thread_policy_update_spinlocked(thread, false, pend_token);
2010
2011 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2012 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2013 thread_tid(thread), threquested_0(thread),
2014 threquested_1(thread), tpending(pend_token), 0);
2015 }
2016
2017 /*
2018 * Set the requested state for a specific flavor to a specific value.
2019 */
2020 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2021 thread_set_requested_policy_spinlocked(thread_t thread,
2022 int category,
2023 int flavor,
2024 int value,
2025 int value2,
2026 task_pend_token_t pend_token)
2027 {
2028 int tier, passive;
2029
2030 struct thread_requested_policy requested = thread->requested_policy;
2031
2032 switch (flavor) {
2033 /* Category: EXTERNAL and INTERNAL, thread and task */
2034
2035 case TASK_POLICY_DARWIN_BG:
2036 if (category == TASK_POLICY_EXTERNAL) {
2037 requested.thrp_ext_darwinbg = value;
2038 } else {
2039 requested.thrp_int_darwinbg = value;
2040 }
2041 break;
2042
2043 case TASK_POLICY_IOPOL:
2044 proc_iopol_to_tier(value, &tier, &passive);
2045 if (category == TASK_POLICY_EXTERNAL) {
2046 requested.thrp_ext_iotier = tier;
2047 requested.thrp_ext_iopassive = passive;
2048 } else {
2049 requested.thrp_int_iotier = tier;
2050 requested.thrp_int_iopassive = passive;
2051 }
2052 break;
2053
2054 case TASK_POLICY_IO:
2055 if (category == TASK_POLICY_EXTERNAL) {
2056 requested.thrp_ext_iotier = value;
2057 } else {
2058 requested.thrp_int_iotier = value;
2059 }
2060 break;
2061
2062 case TASK_POLICY_PASSIVE_IO:
2063 if (category == TASK_POLICY_EXTERNAL) {
2064 requested.thrp_ext_iopassive = value;
2065 } else {
2066 requested.thrp_int_iopassive = value;
2067 }
2068 break;
2069
2070 /* Category: ATTRIBUTE, thread only */
2071
2072 case TASK_POLICY_PIDBIND_BG:
2073 assert(category == TASK_POLICY_ATTRIBUTE);
2074 requested.thrp_pidbind_bg = value;
2075 break;
2076
2077 case TASK_POLICY_LATENCY_QOS:
2078 assert(category == TASK_POLICY_ATTRIBUTE);
2079 requested.thrp_latency_qos = value;
2080 break;
2081
2082 case TASK_POLICY_THROUGH_QOS:
2083 assert(category == TASK_POLICY_ATTRIBUTE);
2084 requested.thrp_through_qos = value;
2085 break;
2086
2087 case TASK_POLICY_QOS_OVERRIDE:
2088 assert(category == TASK_POLICY_ATTRIBUTE);
2089 requested.thrp_qos_override = value;
2090 pend_token->tpt_update_turnstile = 1;
2091 break;
2092
2093 case TASK_POLICY_QOS_AND_RELPRIO:
2094 assert(category == TASK_POLICY_ATTRIBUTE);
2095 requested.thrp_qos = value;
2096 requested.thrp_qos_relprio = value2;
2097 pend_token->tpt_update_turnstile = 1;
2098 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2099 break;
2100
2101 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2102 assert(category == TASK_POLICY_ATTRIBUTE);
2103 requested.thrp_qos_workq_override = value;
2104 pend_token->tpt_update_turnstile = 1;
2105 break;
2106
2107 case TASK_POLICY_QOS_PROMOTE:
2108 assert(category == TASK_POLICY_ATTRIBUTE);
2109 requested.thrp_qos_promote = value;
2110 break;
2111
2112 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2113 assert(category == TASK_POLICY_ATTRIBUTE);
2114 requested.thrp_qos_kevent_override = value;
2115 pend_token->tpt_update_turnstile = 1;
2116 break;
2117
2118 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2119 assert(category == TASK_POLICY_ATTRIBUTE);
2120 requested.thrp_qos_wlsvc_override = value;
2121 pend_token->tpt_update_turnstile = 1;
2122 break;
2123
2124 case TASK_POLICY_TERMINATED:
2125 assert(category == TASK_POLICY_ATTRIBUTE);
2126 requested.thrp_terminated = value;
2127 break;
2128
2129 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2130 assert(category == TASK_POLICY_ATTRIBUTE);
2131 requested.thrp_iotier_kevent_override = value;
2132 break;
2133
2134 case TASK_POLICY_WI_DRIVEN:
2135 assert(category == TASK_POLICY_ATTRIBUTE);
2136 assert(thread == current_thread());
2137
2138 if (value != TH_MODE_NONE) {
2139 thread->static_param = true;
2140 sched_set_thread_mode_user(thread, value);
2141 requested.thrp_wi_driven = 1;
2142 } else {
2143 sched_set_thread_mode_user(thread, TH_MODE_TIMESHARE);
2144 requested.thrp_wi_driven = 0;
2145 }
2146 break;
2147
2148 default:
2149 panic("unknown task policy: %d %d %d", category, flavor, value);
2150 break;
2151 }
2152
2153 thread->requested_policy = requested;
2154 }
2155
2156 /*
2157 * Gets what you set. Effective values may be different.
2158 * Precondition: No locks are held
2159 */
2160 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2161 proc_get_thread_policy(thread_t thread,
2162 int category,
2163 int flavor)
2164 {
2165 int value = 0;
2166 thread_mtx_lock(thread);
2167 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2168 thread_mtx_unlock(thread);
2169 return value;
2170 }
2171
2172 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2173 proc_get_thread_policy_locked(thread_t thread,
2174 int category,
2175 int flavor,
2176 int* value2)
2177 {
2178 int value = 0;
2179
2180 spl_t s = splsched();
2181 thread_lock(thread);
2182
2183 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2184
2185 thread_unlock(thread);
2186 splx(s);
2187
2188 return value;
2189 }
2190
2191 /*
2192 * Gets what you set. Effective values may be different.
2193 */
2194 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2195 thread_get_requested_policy_spinlocked(thread_t thread,
2196 int category,
2197 int flavor,
2198 int* value2)
2199 {
2200 int value = 0;
2201
2202 struct thread_requested_policy requested = thread->requested_policy;
2203
2204 switch (flavor) {
2205 case TASK_POLICY_DARWIN_BG:
2206 if (category == TASK_POLICY_EXTERNAL) {
2207 value = requested.thrp_ext_darwinbg;
2208 } else {
2209 value = requested.thrp_int_darwinbg;
2210 }
2211 break;
2212 case TASK_POLICY_IOPOL:
2213 if (category == TASK_POLICY_EXTERNAL) {
2214 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2215 requested.thrp_ext_iopassive);
2216 } else {
2217 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2218 requested.thrp_int_iopassive);
2219 }
2220 break;
2221 case TASK_POLICY_IO:
2222 if (category == TASK_POLICY_EXTERNAL) {
2223 value = requested.thrp_ext_iotier;
2224 } else {
2225 value = requested.thrp_int_iotier;
2226 }
2227 break;
2228 case TASK_POLICY_PASSIVE_IO:
2229 if (category == TASK_POLICY_EXTERNAL) {
2230 value = requested.thrp_ext_iopassive;
2231 } else {
2232 value = requested.thrp_int_iopassive;
2233 }
2234 break;
2235 case TASK_POLICY_QOS:
2236 assert(category == TASK_POLICY_ATTRIBUTE);
2237 value = requested.thrp_qos;
2238 break;
2239 case TASK_POLICY_QOS_OVERRIDE:
2240 assert(category == TASK_POLICY_ATTRIBUTE);
2241 value = requested.thrp_qos_override;
2242 break;
2243 case TASK_POLICY_LATENCY_QOS:
2244 assert(category == TASK_POLICY_ATTRIBUTE);
2245 value = requested.thrp_latency_qos;
2246 break;
2247 case TASK_POLICY_THROUGH_QOS:
2248 assert(category == TASK_POLICY_ATTRIBUTE);
2249 value = requested.thrp_through_qos;
2250 break;
2251 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2252 assert(category == TASK_POLICY_ATTRIBUTE);
2253 value = requested.thrp_qos_workq_override;
2254 break;
2255 case TASK_POLICY_QOS_AND_RELPRIO:
2256 assert(category == TASK_POLICY_ATTRIBUTE);
2257 assert(value2 != NULL);
2258 value = requested.thrp_qos;
2259 *value2 = requested.thrp_qos_relprio;
2260 break;
2261 case TASK_POLICY_QOS_PROMOTE:
2262 assert(category == TASK_POLICY_ATTRIBUTE);
2263 value = requested.thrp_qos_promote;
2264 break;
2265 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2266 assert(category == TASK_POLICY_ATTRIBUTE);
2267 value = requested.thrp_qos_kevent_override;
2268 break;
2269 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2270 assert(category == TASK_POLICY_ATTRIBUTE);
2271 value = requested.thrp_qos_wlsvc_override;
2272 break;
2273 case TASK_POLICY_TERMINATED:
2274 assert(category == TASK_POLICY_ATTRIBUTE);
2275 value = requested.thrp_terminated;
2276 break;
2277 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2278 assert(category == TASK_POLICY_ATTRIBUTE);
2279 value = requested.thrp_iotier_kevent_override;
2280 break;
2281
2282 case TASK_POLICY_WI_DRIVEN:
2283 assert(category == TASK_POLICY_ATTRIBUTE);
2284 value = requested.thrp_wi_driven;
2285 break;
2286
2287 default:
2288 panic("unknown policy_flavor %d", flavor);
2289 break;
2290 }
2291
2292 return value;
2293 }
2294
2295 /*
2296 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2297 *
2298 * NOTE: This accessor does not take the task or thread lock.
2299 * Notifications of state updates need to be externally synchronized with state queries.
2300 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2301 * within the context of a timer interrupt.
2302 *
2303 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2304 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2305 * I don't think that cost is worth not having the right answer.
2306 */
2307 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2308 proc_get_effective_thread_policy(thread_t thread,
2309 int flavor)
2310 {
2311 int value = 0;
2312
2313 switch (flavor) {
2314 case TASK_POLICY_DARWIN_BG:
2315 /*
2316 * This call is used within the timer layer, as well as
2317 * prioritizing requests to the graphics system.
2318 * It also informs SFI and originator-bg-state.
2319 * Returns 1 for background mode, 0 for normal mode
2320 */
2321
2322 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2323 break;
2324 case TASK_POLICY_IO:
2325 /*
2326 * The I/O system calls here to find out what throttling tier to apply to an operation.
2327 * Returns THROTTLE_LEVEL_* values
2328 */
2329 value = thread->effective_policy.thep_io_tier;
2330 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2331 value = MIN(value, thread->iotier_override);
2332 }
2333 break;
2334 case TASK_POLICY_PASSIVE_IO:
2335 /*
2336 * The I/O system calls here to find out whether an operation should be passive.
2337 * (i.e. not cause operations with lower throttle tiers to be throttled)
2338 * Returns 1 for passive mode, 0 for normal mode
2339 *
2340 * If an override is causing IO to go into a lower tier, we also set
2341 * the passive bit so that a thread doesn't end up stuck in its own throttle
2342 * window when the override goes away.
2343 */
2344 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2345 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2346 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2347 value = 1;
2348 }
2349 break;
2350 case TASK_POLICY_ALL_SOCKETS_BG:
2351 /*
2352 * do_background_socket() calls this to determine whether
2353 * it should change the thread's sockets
2354 * Returns 1 for background mode, 0 for normal mode
2355 * This consults both thread and task so un-DBGing a thread while the task is BG
2356 * doesn't get you out of the network throttle.
2357 */
2358 value = (thread->effective_policy.thep_all_sockets_bg ||
2359 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2360 break;
2361 case TASK_POLICY_NEW_SOCKETS_BG:
2362 /*
2363 * socreate() calls this to determine if it should mark a new socket as background
2364 * Returns 1 for background mode, 0 for normal mode
2365 */
2366 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2367 break;
2368 case TASK_POLICY_LATENCY_QOS:
2369 /*
2370 * timer arming calls into here to find out the timer coalescing level
2371 * Returns a latency QoS tier (0-6)
2372 */
2373 value = thread->effective_policy.thep_latency_qos;
2374 break;
2375 case TASK_POLICY_THROUGH_QOS:
2376 /*
2377 * This value is passed into the urgency callout from the scheduler
2378 * to the performance management subsystem.
2379 *
2380 * Returns a throughput QoS tier (0-6)
2381 */
2382 value = thread->effective_policy.thep_through_qos;
2383 break;
2384 case TASK_POLICY_QOS:
2385 /*
2386 * This is communicated to the performance management layer and SFI.
2387 *
2388 * Returns a QoS policy tier
2389 */
2390 value = thread->effective_policy.thep_qos;
2391 break;
2392 default:
2393 panic("unknown thread policy flavor %d", flavor);
2394 break;
2395 }
2396
2397 return value;
2398 }
2399
2400
2401 /*
2402 * (integer_t) casts limit the number of bits we can fit here
2403 * this interface is deprecated and replaced by the _EXT struct ?
2404 */
2405 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2406 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2407 {
2408 uint64_t bits = 0;
2409 struct thread_requested_policy requested = thread->requested_policy;
2410
2411 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2412 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2413 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2414 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2415 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2416 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2417
2418 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2419 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2420
2421 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2422
2423 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2424 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2425
2426 info->requested = (integer_t) bits;
2427 bits = 0;
2428
2429 struct thread_effective_policy effective = thread->effective_policy;
2430
2431 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2432
2433 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2434 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2435 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2436 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2437
2438 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2439
2440 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2441 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2442
2443 info->effective = (integer_t)bits;
2444 bits = 0;
2445
2446 info->pending = 0;
2447 }
2448
2449 /*
2450 * Sneakily trace either the task and thread requested
2451 * or just the thread requested, depending on if we have enough room.
2452 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2453 *
2454 * LP32 LP64
2455 * threquested_0(thread) thread[0] task[0]
2456 * threquested_1(thread) thread[1] thread[0]
2457 *
2458 */
2459
2460 uintptr_t
threquested_0(thread_t thread)2461 threquested_0(thread_t thread)
2462 {
2463 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2464
2465 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2466
2467 return raw[0];
2468 }
2469
2470 uintptr_t
threquested_1(thread_t thread)2471 threquested_1(thread_t thread)
2472 {
2473 #if defined __LP64__
2474 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2475 #else
2476 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2477 return raw[1];
2478 #endif
2479 }
2480
2481 uintptr_t
theffective_0(thread_t thread)2482 theffective_0(thread_t thread)
2483 {
2484 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2485
2486 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2487 return raw[0];
2488 }
2489
2490 uintptr_t
theffective_1(thread_t thread)2491 theffective_1(thread_t thread)
2492 {
2493 #if defined __LP64__
2494 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2495 #else
2496 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2497 return raw[1];
2498 #endif
2499 }
2500
2501
2502 /*
2503 * Set an override on the thread which is consulted with a
2504 * higher priority than the task/thread policy. This should
2505 * only be set for temporary grants until the thread
2506 * returns to the userspace boundary
2507 *
2508 * We use atomic operations to swap in the override, with
2509 * the assumption that the thread itself can
2510 * read the override and clear it on return to userspace.
2511 *
2512 * No locking is performed, since it is acceptable to see
2513 * a stale override for one loop through throttle_lowpri_io().
2514 * However a thread reference must be held on the thread.
2515 */
2516
2517 void
set_thread_iotier_override(thread_t thread,int policy)2518 set_thread_iotier_override(thread_t thread, int policy)
2519 {
2520 int current_override;
2521
2522 /* Let most aggressive I/O policy win until user boundary */
2523 do {
2524 current_override = thread->iotier_override;
2525
2526 if (current_override != THROTTLE_LEVEL_NONE) {
2527 policy = MIN(current_override, policy);
2528 }
2529
2530 if (current_override == policy) {
2531 /* no effective change */
2532 return;
2533 }
2534 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2535
2536 /*
2537 * Since the thread may be currently throttled,
2538 * re-evaluate tiers and potentially break out
2539 * of an msleep
2540 */
2541 rethrottle_thread(get_bsdthread_info(thread));
2542 }
2543
2544 /*
2545 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2546 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2547 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2548 * priority thread. In these cases, we attempt to propagate the priority token, as long
2549 * as the subsystem informs us of the relationships between the threads. The userspace
2550 * synchronization subsystem should maintain the information of owner->resource and
2551 * resource->waiters itself.
2552 */
2553
2554 /*
2555 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2556 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2557 * to be handled specially in the future, but for now it's fine to slam
2558 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2559 */
2560 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2561 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2562 {
2563 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2564 /* Map all input resource/type to a single one */
2565 *resource = USER_ADDR_NULL;
2566 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2567 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2568 /* no transform */
2569 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2570 /* Map all mutex overrides to a single one, to avoid memory overhead */
2571 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2572 *resource = USER_ADDR_NULL;
2573 }
2574 }
2575 }
2576
2577 /* This helper routine finds an existing override if known. Locking should be done by caller */
2578 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2579 find_qos_override(thread_t thread,
2580 user_addr_t resource,
2581 int resource_type)
2582 {
2583 struct thread_qos_override *override;
2584
2585 override = thread->overrides;
2586 while (override) {
2587 if (override->override_resource == resource &&
2588 override->override_resource_type == resource_type) {
2589 return override;
2590 }
2591
2592 override = override->override_next;
2593 }
2594
2595 return NULL;
2596 }
2597
2598 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2599 find_and_decrement_qos_override(thread_t thread,
2600 user_addr_t resource,
2601 int resource_type,
2602 boolean_t reset,
2603 struct thread_qos_override **free_override_list)
2604 {
2605 struct thread_qos_override *override, *override_prev;
2606
2607 override_prev = NULL;
2608 override = thread->overrides;
2609 while (override) {
2610 struct thread_qos_override *override_next = override->override_next;
2611
2612 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2613 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2614 if (reset) {
2615 override->override_contended_resource_count = 0;
2616 } else {
2617 override->override_contended_resource_count--;
2618 }
2619
2620 if (override->override_contended_resource_count == 0) {
2621 if (override_prev == NULL) {
2622 thread->overrides = override_next;
2623 } else {
2624 override_prev->override_next = override_next;
2625 }
2626
2627 /* Add to out-param for later zfree */
2628 override->override_next = *free_override_list;
2629 *free_override_list = override;
2630 } else {
2631 override_prev = override;
2632 }
2633
2634 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2635 return;
2636 }
2637 } else {
2638 override_prev = override;
2639 }
2640
2641 override = override_next;
2642 }
2643 }
2644
2645 /* This helper recalculates the current requested override using the policy selected at boot */
2646 static int
calculate_requested_qos_override(thread_t thread)2647 calculate_requested_qos_override(thread_t thread)
2648 {
2649 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2650 return THREAD_QOS_UNSPECIFIED;
2651 }
2652
2653 /* iterate over all overrides and calculate MAX */
2654 struct thread_qos_override *override;
2655 int qos_override = THREAD_QOS_UNSPECIFIED;
2656
2657 override = thread->overrides;
2658 while (override) {
2659 qos_override = MAX(qos_override, override->override_qos);
2660 override = override->override_next;
2661 }
2662
2663 return qos_override;
2664 }
2665
2666 /*
2667 * Returns:
2668 * - 0 on success
2669 * - EINVAL if some invalid input was passed
2670 */
2671 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2672 proc_thread_qos_add_override_internal(thread_t thread,
2673 int override_qos,
2674 boolean_t first_override_for_resource,
2675 user_addr_t resource,
2676 int resource_type)
2677 {
2678 struct task_pend_token pend_token = {};
2679 int rc = 0;
2680
2681 thread_mtx_lock(thread);
2682
2683 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2684 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2685
2686 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2687 uint64_t, thread->requested_policy.thrp_qos,
2688 uint64_t, thread->effective_policy.thep_qos,
2689 int, override_qos, boolean_t, first_override_for_resource);
2690
2691 struct thread_qos_override *override;
2692 struct thread_qos_override *override_new = NULL;
2693 int new_qos_override, prev_qos_override;
2694 int new_effective_qos;
2695
2696 canonicalize_resource_and_type(&resource, &resource_type);
2697
2698 override = find_qos_override(thread, resource, resource_type);
2699 if (first_override_for_resource && !override) {
2700 /* We need to allocate a new object. Drop the thread lock and
2701 * recheck afterwards in case someone else added the override
2702 */
2703 thread_mtx_unlock(thread);
2704 override_new = zalloc(thread_qos_override_zone);
2705 thread_mtx_lock(thread);
2706 override = find_qos_override(thread, resource, resource_type);
2707 }
2708 if (first_override_for_resource && override) {
2709 /* Someone else already allocated while the thread lock was dropped */
2710 override->override_contended_resource_count++;
2711 } else if (!override && override_new) {
2712 override = override_new;
2713 override_new = NULL;
2714 override->override_next = thread->overrides;
2715 /* since first_override_for_resource was TRUE */
2716 override->override_contended_resource_count = 1;
2717 override->override_resource = resource;
2718 override->override_resource_type = (int16_t)resource_type;
2719 override->override_qos = THREAD_QOS_UNSPECIFIED;
2720 thread->overrides = override;
2721 }
2722
2723 if (override) {
2724 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2725 override->override_qos = (int16_t)override_qos;
2726 } else {
2727 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2728 }
2729 }
2730
2731 /* Determine how to combine the various overrides into a single current
2732 * requested override
2733 */
2734 new_qos_override = calculate_requested_qos_override(thread);
2735
2736 prev_qos_override = proc_get_thread_policy_locked(thread,
2737 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2738
2739 if (new_qos_override != prev_qos_override) {
2740 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2741 TASK_POLICY_QOS_OVERRIDE,
2742 new_qos_override, 0, &pend_token);
2743 }
2744
2745 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2746
2747 thread_mtx_unlock(thread);
2748
2749 thread_policy_update_complete_unlocked(thread, &pend_token);
2750
2751 if (override_new) {
2752 zfree(thread_qos_override_zone, override_new);
2753 }
2754
2755 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2756 int, new_qos_override, int, new_effective_qos, int, rc);
2757
2758 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2759 new_qos_override, resource, resource_type, 0, 0);
2760
2761 return rc;
2762 }
2763
2764 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2765 proc_thread_qos_add_override(task_t task,
2766 thread_t thread,
2767 uint64_t tid,
2768 int override_qos,
2769 boolean_t first_override_for_resource,
2770 user_addr_t resource,
2771 int resource_type)
2772 {
2773 boolean_t has_thread_reference = FALSE;
2774 int rc = 0;
2775
2776 if (thread == THREAD_NULL) {
2777 thread = task_findtid(task, tid);
2778 /* returns referenced thread */
2779
2780 if (thread == THREAD_NULL) {
2781 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2782 tid, 0, 0xdead, 0, 0);
2783 return ESRCH;
2784 }
2785 has_thread_reference = TRUE;
2786 } else {
2787 assert(get_threadtask(thread) == task);
2788 }
2789 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2790 first_override_for_resource, resource, resource_type);
2791 if (has_thread_reference) {
2792 thread_deallocate(thread);
2793 }
2794
2795 return rc;
2796 }
2797
2798 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2799 proc_thread_qos_remove_override_internal(thread_t thread,
2800 user_addr_t resource,
2801 int resource_type,
2802 boolean_t reset)
2803 {
2804 struct task_pend_token pend_token = {};
2805
2806 struct thread_qos_override *deferred_free_override_list = NULL;
2807 int new_qos_override, prev_qos_override, new_effective_qos;
2808
2809 thread_mtx_lock(thread);
2810
2811 canonicalize_resource_and_type(&resource, &resource_type);
2812
2813 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2814
2815 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2816 thread_tid(thread), resource, reset, 0, 0);
2817
2818 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2819 uint64_t, thread->requested_policy.thrp_qos,
2820 uint64_t, thread->effective_policy.thep_qos);
2821
2822 /* Determine how to combine the various overrides into a single current requested override */
2823 new_qos_override = calculate_requested_qos_override(thread);
2824
2825 spl_t s = splsched();
2826 thread_lock(thread);
2827
2828 /*
2829 * The override chain and therefore the value of the current override is locked with thread mutex,
2830 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2831 * This means you can't change the current override from a spinlock-only setter.
2832 */
2833 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2834
2835 if (new_qos_override != prev_qos_override) {
2836 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2837 }
2838
2839 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2840
2841 thread_unlock(thread);
2842 splx(s);
2843
2844 thread_mtx_unlock(thread);
2845
2846 thread_policy_update_complete_unlocked(thread, &pend_token);
2847
2848 while (deferred_free_override_list) {
2849 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2850
2851 zfree(thread_qos_override_zone, deferred_free_override_list);
2852 deferred_free_override_list = override_next;
2853 }
2854
2855 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2856 int, new_qos_override, int, new_effective_qos);
2857
2858 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2859 thread_tid(thread), 0, 0, 0, 0);
2860 }
2861
2862 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2863 proc_thread_qos_remove_override(task_t task,
2864 thread_t thread,
2865 uint64_t tid,
2866 user_addr_t resource,
2867 int resource_type)
2868 {
2869 boolean_t has_thread_reference = FALSE;
2870
2871 if (thread == THREAD_NULL) {
2872 thread = task_findtid(task, tid);
2873 /* returns referenced thread */
2874
2875 if (thread == THREAD_NULL) {
2876 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2877 tid, 0, 0xdead, 0, 0);
2878 return ESRCH;
2879 }
2880 has_thread_reference = TRUE;
2881 } else {
2882 assert(task == get_threadtask(thread));
2883 }
2884
2885 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2886
2887 if (has_thread_reference) {
2888 thread_deallocate(thread);
2889 }
2890
2891 return 0;
2892 }
2893
2894 /* Deallocate before thread termination */
2895 void
proc_thread_qos_deallocate(thread_t thread)2896 proc_thread_qos_deallocate(thread_t thread)
2897 {
2898 /* This thread must have no more IPC overrides. */
2899 assert(thread->kevent_overrides == 0);
2900 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2901 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2902
2903 /*
2904 * Clear out any lingering override objects.
2905 */
2906 struct thread_qos_override *override;
2907
2908 thread_mtx_lock(thread);
2909 override = thread->overrides;
2910 thread->overrides = NULL;
2911 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2912 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2913 thread_mtx_unlock(thread);
2914
2915 while (override) {
2916 struct thread_qos_override *override_next = override->override_next;
2917
2918 zfree(thread_qos_override_zone, override);
2919 override = override_next;
2920 }
2921 }
2922
2923 /*
2924 * Set up the primordial thread's QoS
2925 */
2926 void
task_set_main_thread_qos(task_t task,thread_t thread)2927 task_set_main_thread_qos(task_t task, thread_t thread)
2928 {
2929 struct task_pend_token pend_token = {};
2930
2931 assert(get_threadtask(thread) == task);
2932
2933 thread_mtx_lock(thread);
2934
2935 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2936 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2937 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2938 thread->requested_policy.thrp_qos, 0);
2939
2940 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2941
2942 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2943 primordial_qos, 0, &pend_token);
2944
2945 thread_mtx_unlock(thread);
2946
2947 thread_policy_update_complete_unlocked(thread, &pend_token);
2948
2949 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2950 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2951 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2952 primordial_qos, 0);
2953 }
2954
2955 /*
2956 * KPI for pthread kext
2957 *
2958 * Return a good guess at what the initial manager QoS will be
2959 * Dispatch can override this in userspace if it so chooses
2960 */
2961 thread_qos_t
task_get_default_manager_qos(task_t task)2962 task_get_default_manager_qos(task_t task)
2963 {
2964 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2965
2966 if (primordial_qos == THREAD_QOS_LEGACY) {
2967 primordial_qos = THREAD_QOS_USER_INITIATED;
2968 }
2969
2970 return primordial_qos;
2971 }
2972
2973 /*
2974 * Check if the kernel promotion on thread has changed
2975 * and apply it.
2976 *
2977 * thread locked on entry and exit
2978 */
2979 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2980 thread_recompute_kernel_promotion_locked(thread_t thread)
2981 {
2982 boolean_t needs_update = FALSE;
2983 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2984
2985 /*
2986 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2987 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2988 * and propagates the priority through the chain with the same cap, because as of now it does
2989 * not differenciate on the kernel primitive.
2990 *
2991 * If this assumption will change with the adoption of a kernel primitive that does not
2992 * cap the when adding/propagating,
2993 * then here is the place to put the generic cap for all kernel primitives
2994 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2995 */
2996 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2997
2998 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2999 KDBG(MACHDBG_CODE(
3000 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3001 thread_tid(thread),
3002 kern_promotion_schedpri,
3003 thread->kern_promotion_schedpri);
3004
3005 needs_update = TRUE;
3006 thread->kern_promotion_schedpri = kern_promotion_schedpri;
3007 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3008 }
3009
3010 return needs_update;
3011 }
3012
3013 /*
3014 * Check if the user promotion on thread has changed
3015 * and apply it.
3016 *
3017 * thread locked on entry, might drop the thread lock
3018 * and reacquire it.
3019 */
3020 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3021 thread_recompute_user_promotion_locked(thread_t thread)
3022 {
3023 boolean_t needs_update = FALSE;
3024 struct task_pend_token pend_token = {};
3025 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3026 int old_base_pri = thread->base_pri;
3027 thread_qos_t qos_promotion;
3028
3029 /* Check if user promotion has changed */
3030 if (thread->user_promotion_basepri == user_promotion_basepri) {
3031 return needs_update;
3032 } else {
3033 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3034 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3035 thread_tid(thread),
3036 user_promotion_basepri,
3037 thread->user_promotion_basepri,
3038 0, 0);
3039 KDBG(MACHDBG_CODE(
3040 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3041 thread_tid(thread),
3042 user_promotion_basepri,
3043 thread->user_promotion_basepri);
3044 }
3045
3046 /* Update the user promotion base pri */
3047 thread->user_promotion_basepri = user_promotion_basepri;
3048 pend_token.tpt_force_recompute_pri = 1;
3049
3050 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3051 qos_promotion = THREAD_QOS_UNSPECIFIED;
3052 } else {
3053 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3054 }
3055
3056 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3057 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3058
3059 if (thread_get_waiting_turnstile(thread) &&
3060 thread->base_pri != old_base_pri) {
3061 needs_update = TRUE;
3062 }
3063
3064 thread_unlock(thread);
3065
3066 thread_policy_update_complete_unlocked(thread, &pend_token);
3067
3068 thread_lock(thread);
3069
3070 return needs_update;
3071 }
3072
3073 /*
3074 * Convert the thread user promotion base pri to qos for threads in qos world.
3075 * For priority above UI qos, the qos would be set to UI.
3076 */
3077 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3078 thread_user_promotion_qos_for_pri(int priority)
3079 {
3080 thread_qos_t qos;
3081 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3082 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3083 return qos;
3084 }
3085 }
3086 return THREAD_QOS_MAINTENANCE;
3087 }
3088
3089 /*
3090 * Set the thread's QoS Kevent override
3091 * Owned by the Kevent subsystem
3092 *
3093 * May be called with spinlocks held, but not spinlocks
3094 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3095 *
3096 * One 'add' must be balanced by one 'drop'.
3097 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3098 * Before the thread is deallocated, there must be 0 remaining overrides.
3099 */
3100 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3101 thread_kevent_override(thread_t thread,
3102 uint32_t qos_override,
3103 boolean_t is_new_override)
3104 {
3105 struct task_pend_token pend_token = {};
3106 boolean_t needs_update;
3107
3108 spl_t s = splsched();
3109 thread_lock(thread);
3110
3111 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3112
3113 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3114 assert(qos_override < THREAD_QOS_LAST);
3115
3116 if (is_new_override) {
3117 if (thread->kevent_overrides++ == 0) {
3118 /* This add is the first override for this thread */
3119 assert(old_override == THREAD_QOS_UNSPECIFIED);
3120 } else {
3121 /* There are already other overrides in effect for this thread */
3122 assert(old_override > THREAD_QOS_UNSPECIFIED);
3123 }
3124 } else {
3125 /* There must be at least one override (the previous add call) in effect */
3126 assert(thread->kevent_overrides > 0);
3127 assert(old_override > THREAD_QOS_UNSPECIFIED);
3128 }
3129
3130 /*
3131 * We can't allow lowering if there are several IPC overrides because
3132 * the caller can't possibly know the whole truth
3133 */
3134 if (thread->kevent_overrides == 1) {
3135 needs_update = qos_override != old_override;
3136 } else {
3137 needs_update = qos_override > old_override;
3138 }
3139
3140 if (needs_update) {
3141 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3142 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3143 qos_override, 0, &pend_token);
3144 assert(pend_token.tpt_update_sockets == 0);
3145 }
3146
3147 thread_unlock(thread);
3148 splx(s);
3149
3150 thread_policy_update_complete_unlocked(thread, &pend_token);
3151 }
3152
3153 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3154 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3155 {
3156 thread_kevent_override(thread, qos_override, TRUE);
3157 }
3158
3159 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3160 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3161 {
3162 thread_kevent_override(thread, qos_override, FALSE);
3163 }
3164
3165 void
thread_drop_kevent_override(thread_t thread)3166 thread_drop_kevent_override(thread_t thread)
3167 {
3168 struct task_pend_token pend_token = {};
3169
3170 spl_t s = splsched();
3171 thread_lock(thread);
3172
3173 assert(thread->kevent_overrides > 0);
3174
3175 if (--thread->kevent_overrides == 0) {
3176 /*
3177 * There are no more overrides for this thread, so we should
3178 * clear out the saturated override value
3179 */
3180
3181 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3182 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3183 0, &pend_token);
3184 }
3185
3186 thread_unlock(thread);
3187 splx(s);
3188
3189 thread_policy_update_complete_unlocked(thread, &pend_token);
3190 }
3191
3192 /*
3193 * Set the thread's QoS Workloop Servicer override
3194 * Owned by the Kevent subsystem
3195 *
3196 * May be called with spinlocks held, but not spinlocks
3197 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3198 *
3199 * One 'add' must be balanced by one 'drop'.
3200 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3201 * Before the thread is deallocated, there must be 0 remaining overrides.
3202 */
3203 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3204 thread_servicer_override(thread_t thread,
3205 uint32_t qos_override,
3206 boolean_t is_new_override)
3207 {
3208 struct task_pend_token pend_token = {};
3209
3210 spl_t s = splsched();
3211 thread_lock(thread);
3212
3213 if (is_new_override) {
3214 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3215 } else {
3216 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3217 }
3218
3219 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3220 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3221 qos_override, 0, &pend_token);
3222
3223 thread_unlock(thread);
3224 splx(s);
3225
3226 assert(pend_token.tpt_update_sockets == 0);
3227 thread_policy_update_complete_unlocked(thread, &pend_token);
3228 }
3229
3230 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3231 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3232 {
3233 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3234 assert(qos_override < THREAD_QOS_LAST);
3235
3236 thread_servicer_override(thread, qos_override, TRUE);
3237 }
3238
3239 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3240 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3241 {
3242 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3243 assert(qos_override < THREAD_QOS_LAST);
3244
3245 thread_servicer_override(thread, qos_override, FALSE);
3246 }
3247
3248 void
thread_drop_servicer_override(thread_t thread)3249 thread_drop_servicer_override(thread_t thread)
3250 {
3251 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3252 }
3253
3254 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3255 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3256 {
3257 struct task_pend_token pend_token = {};
3258 uint8_t current_iotier;
3259
3260 /* Check if the update is needed */
3261 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3262 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3263
3264 if (iotier_override == current_iotier) {
3265 return;
3266 }
3267
3268 spl_t s = splsched();
3269 thread_lock(thread);
3270
3271 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3272 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3273 iotier_override, 0, &pend_token);
3274
3275 thread_unlock(thread);
3276 splx(s);
3277
3278 assert(pend_token.tpt_update_sockets == 0);
3279 thread_policy_update_complete_unlocked(thread, &pend_token);
3280 }
3281
3282 /* Get current requested qos / relpri, may be called from spinlock context */
3283 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3284 thread_get_requested_qos(thread_t thread, int *relpri)
3285 {
3286 int relprio_value = 0;
3287 thread_qos_t qos;
3288
3289 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3290 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3291 if (relpri) {
3292 *relpri = -relprio_value;
3293 }
3294 return qos;
3295 }
3296
3297 /*
3298 * This function will promote the thread priority
3299 * since exec could block other threads calling
3300 * proc_find on the proc. This boost must be removed
3301 * via call to thread_clear_exec_promotion.
3302 *
3303 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3304 */
3305 void
thread_set_exec_promotion(thread_t thread)3306 thread_set_exec_promotion(thread_t thread)
3307 {
3308 spl_t s = splsched();
3309 thread_lock(thread);
3310
3311 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3312
3313 thread_unlock(thread);
3314 splx(s);
3315 }
3316
3317 /*
3318 * This function will clear the exec thread
3319 * promotion set on the thread by thread_set_exec_promotion.
3320 */
3321 void
thread_clear_exec_promotion(thread_t thread)3322 thread_clear_exec_promotion(thread_t thread)
3323 {
3324 spl_t s = splsched();
3325 thread_lock(thread);
3326
3327 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3328
3329 thread_unlock(thread);
3330 splx(s);
3331 }
3332
3333 #if CONFIG_SCHED_RT_RESTRICT
3334 /*
3335 * flag set by -time-constraint-policy-restrict boot-arg to restrict use of
3336 * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3337 * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3338 */
3339 static TUNABLE(
3340 bool,
3341 restrict_time_constraint_policy,
3342 "-time-constraint-policy-restrict",
3343 false
3344 );
3345
3346 void
thread_rt_evaluate(thread_t thread)3347 thread_rt_evaluate(thread_t thread)
3348 {
3349 /* If no restrictions are configured - nothing to do. */
3350 if (!restrict_time_constraint_policy) {
3351 return;
3352 }
3353
3354 /* User threads only. */
3355 if (get_threadtask(thread) == kernel_task) {
3356 return;
3357 }
3358
3359 spl_t s = splsched();
3360 thread_lock(thread);
3361
3362 const thread_work_interval_flags_t flags =
3363 os_atomic_load(&thread->th_work_interval_flags, relaxed);
3364
3365 /*
3366 * RT threads are demoted if they are no longer joined to a work
3367 * interval which has the RT_ALLOWED flag set (and not already demoted).
3368 */
3369 if (((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) == 0) &&
3370 ((flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) &&
3371 (thread->sched_mode == TH_MODE_REALTIME || thread->saved_mode == TH_MODE_REALTIME)) {
3372 sched_thread_mode_demote(thread, TH_SFLAG_RT_RESTRICTED);
3373 }
3374
3375 /*
3376 * If demoted and joined to a work interval which allows RT threads,
3377 * then undemote.
3378 */
3379 if (((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) != 0) &&
3380 ((flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) != 0)) {
3381 sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
3382 }
3383
3384 thread_unlock(thread);
3385 splx(s);
3386 }
3387 #else
3388
3389 void
thread_rt_evaluate(__unused thread_t thread)3390 thread_rt_evaluate(__unused thread_t thread)
3391 {
3392 }
3393 #endif /* CONFIG_SCHED_RT_RESTRICT */
3394