1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42
43 #include <mach/machine/sdt.h>
44
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46 struct thread_qos_override, KT_DEFAULT);
47
48 #ifdef MACH_BSD
49 extern int proc_selfpid(void);
50 extern char * proc_name_address(void *p);
51 extern void rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53
54 #define QOS_EXTRACT(q) ((q) & 0xff)
55
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66
67 const int thread_default_iotier_override = THROTTLE_LEVEL_END;
68
69 const struct thread_requested_policy default_thread_requested_policy = {
70 .thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72
73 /*
74 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75 * to threads that don't have a QoS class set.
76 */
77 const qos_policy_params_t thread_qos_policy_params = {
78 /*
79 * This table defines the starting base priority of the thread,
80 * which will be modified by the thread importance and the task max priority
81 * before being applied.
82 */
83 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
84 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
85 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
86 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
87 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
88 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
89 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
90
91 /*
92 * This table defines the highest IO priority that a thread marked with this
93 * QoS class can have.
94 */
95 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
96 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
99 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
100 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
102
103 /*
104 * This table defines the highest QoS level that
105 * a thread marked with this QoS class can have.
106 */
107
108 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115
116 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (!allow_qos_policy_set) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287
288 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 return KERN_INVALID_ARGUMENT;
291 }
292 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 if (info->priority != BASEPRI_RTQUEUES) {
294 return KERN_INVALID_ARGUMENT;
295 }
296 }
297 }
298
299 if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 &thread->th_work_interval_flags, relaxed);
302 if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 /* Fail requests to become realtime for threads having joined workintervals
305 * with workload ID that don't have the rt-allowed flag. */
306 return KERN_INVALID_POLICY;
307 }
308 }
309
310 /* Threads without static_param set reset their QoS when other policies are applied. */
311 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 /* Store the existing tier, if we fail this call it is used to reset back. */
313 req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315
316 kr = thread_remove_qos_policy(thread);
317 if (kr != KERN_SUCCESS) {
318 return kr;
319 }
320 }
321
322 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323
324 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 if (kr != KERN_SUCCESS) {
326 /* Reset back to our original tier as the set failed. */
327 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 }
329 }
330
331 return kr;
332 }
333
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 thread_t thread,
342 thread_policy_flavor_t flavor,
343 thread_policy_t policy_info,
344 mach_msg_type_number_t count)
345 {
346 kern_return_t result = KERN_SUCCESS;
347 struct task_pend_token pend_token = {};
348
349 thread_mtx_lock(thread);
350 if (!thread->active) {
351 thread_mtx_unlock(thread);
352
353 return KERN_TERMINATED;
354 }
355
356 switch (flavor) {
357 case THREAD_EXTENDED_POLICY:
358 {
359 boolean_t timeshare = TRUE;
360
361 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 thread_extended_policy_t info;
363
364 info = (thread_extended_policy_t)policy_info;
365 timeshare = info->timeshare;
366 }
367
368 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369
370 spl_t s = splsched();
371 thread_lock(thread);
372
373 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374
375 thread_unlock(thread);
376 splx(s);
377
378 /*
379 * The thread may be demoted with RT_DISALLOWED but has just
380 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 * undemote the thread so the new sched mode takes effect.
382 */
383 thread_rt_evaluate(thread);
384
385 pend_token.tpt_update_thread_sfi = 1;
386
387 break;
388 }
389
390 case THREAD_TIME_CONSTRAINT_POLICY:
391 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 {
393 thread_time_constraint_with_priority_policy_t info;
394
395 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398
399 if (count < min_count) {
400 result = KERN_INVALID_ARGUMENT;
401 break;
402 }
403
404 info = (thread_time_constraint_with_priority_policy_t)policy_info;
405
406
407 if (info->constraint < info->computation ||
408 info->computation > max_rt_quantum ||
409 info->computation < min_rt_quantum) {
410 result = KERN_INVALID_ARGUMENT;
411 break;
412 }
413
414 if (info->computation < (info->constraint / 2)) {
415 info->computation = (info->constraint / 2);
416 if (info->computation > max_rt_quantum) {
417 info->computation = max_rt_quantum;
418 }
419 }
420
421 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 result = KERN_INVALID_ARGUMENT;
424 break;
425 }
426 }
427
428 spl_t s = splsched();
429 thread_lock(thread);
430
431 thread->realtime.period = info->period;
432 thread->realtime.computation = info->computation;
433 thread->realtime.constraint = info->constraint;
434 thread->realtime.preemptible = info->preemptible;
435
436 /*
437 * If the thread has a work interval driven policy, the priority
438 * offset has been set by the work interval.
439 */
440 if (!thread->requested_policy.thrp_wi_driven) {
441 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 } else {
444 thread->realtime.priority_offset = 0;
445 }
446 }
447
448 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449
450 thread_unlock(thread);
451 splx(s);
452
453 thread_rt_evaluate(thread);
454
455 pend_token.tpt_update_thread_sfi = 1;
456
457 break;
458 }
459
460 case THREAD_PRECEDENCE_POLICY:
461 {
462 thread_precedence_policy_t info;
463
464 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 result = KERN_INVALID_ARGUMENT;
466 break;
467 }
468 info = (thread_precedence_policy_t)policy_info;
469
470 spl_t s = splsched();
471 thread_lock(thread);
472
473 thread->importance = info->importance;
474
475 thread_recompute_priority(thread);
476
477 thread_unlock(thread);
478 splx(s);
479
480 break;
481 }
482
483 case THREAD_AFFINITY_POLICY:
484 {
485 extern boolean_t affinity_sets_enabled;
486 thread_affinity_policy_t info;
487
488 if (!affinity_sets_enabled) {
489 result = KERN_INVALID_POLICY;
490 break;
491 }
492
493 if (!thread_affinity_is_supported()) {
494 result = KERN_NOT_SUPPORTED;
495 break;
496 }
497 if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 result = KERN_INVALID_ARGUMENT;
499 break;
500 }
501
502 info = (thread_affinity_policy_t) policy_info;
503 /*
504 * Unlock the thread mutex here and
505 * return directly after calling thread_affinity_set().
506 * This is necessary for correct lock ordering because
507 * thread_affinity_set() takes the task lock.
508 */
509 thread_mtx_unlock(thread);
510 return thread_affinity_set(thread, info->affinity_tag);
511 }
512
513 #if !defined(XNU_TARGET_OS_OSX)
514 case THREAD_BACKGROUND_POLICY:
515 {
516 thread_background_policy_t info;
517
518 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 result = KERN_INVALID_ARGUMENT;
520 break;
521 }
522
523 if (get_threadtask(thread) != current_task()) {
524 result = KERN_PROTECTION_FAILURE;
525 break;
526 }
527
528 info = (thread_background_policy_t) policy_info;
529
530 int enable;
531
532 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 enable = TASK_POLICY_ENABLE;
534 } else {
535 enable = TASK_POLICY_DISABLE;
536 }
537
538 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539
540 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541
542 break;
543 }
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545
546 case THREAD_THROUGHPUT_QOS_POLICY:
547 {
548 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 thread_throughput_qos_t tqos;
550
551 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 result = KERN_INVALID_ARGUMENT;
553 break;
554 }
555
556 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 break;
558 }
559
560 tqos = qos_extract(info->thread_throughput_qos_tier);
561
562 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564
565 break;
566 }
567
568 case THREAD_LATENCY_QOS_POLICY:
569 {
570 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 thread_latency_qos_t lqos;
572
573 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 result = KERN_INVALID_ARGUMENT;
575 break;
576 }
577
578 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 break;
580 }
581
582 lqos = qos_extract(info->thread_latency_qos_tier);
583
584 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586
587 break;
588 }
589
590 case THREAD_QOS_POLICY:
591 {
592 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593
594 if (count < THREAD_QOS_POLICY_COUNT) {
595 result = KERN_INVALID_ARGUMENT;
596 break;
597 }
598
599 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 result = KERN_INVALID_ARGUMENT;
601 break;
602 }
603
604 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 result = KERN_INVALID_ARGUMENT;
606 break;
607 }
608
609 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 result = KERN_INVALID_ARGUMENT;
611 break;
612 }
613
614 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 info->qos_tier, -info->tier_importance, &pend_token);
616
617 break;
618 }
619
620 default:
621 result = KERN_INVALID_ARGUMENT;
622 break;
623 }
624
625 thread_mtx_unlock(thread);
626
627 thread_policy_update_complete_unlocked(thread, &pend_token);
628
629 return result;
630 }
631
632 /*
633 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634 * Both result in FIXED mode scheduling.
635 */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 switch (policy) {
640 case POLICY_TIMESHARE:
641 return TH_MODE_TIMESHARE;
642 case POLICY_RR:
643 case POLICY_FIFO:
644 return TH_MODE_FIXED;
645 default:
646 panic("unexpected sched policy: %d", policy);
647 return TH_MODE_NONE;
648 }
649 }
650
651 /*
652 * Called either with the thread mutex locked
653 * or from the pthread kext in a 'safe place'.
654 */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t thread,
657 sched_mode_t mode,
658 integer_t priority,
659 task_pend_token_t pend_token)
660 {
661 kern_return_t kr = KERN_SUCCESS;
662
663 spl_t s = splsched();
664 thread_lock(thread);
665
666 /* This path isn't allowed to change a thread out of realtime. */
667 if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 (thread->saved_mode == TH_MODE_REALTIME)) {
669 kr = KERN_FAILURE;
670 goto unlock;
671 }
672
673 if (thread->policy_reset) {
674 kr = KERN_SUCCESS;
675 goto unlock;
676 }
677
678 sched_mode_t old_mode = thread->sched_mode;
679
680 /*
681 * Reverse engineer and apply the correct importance value
682 * from the requested absolute priority value.
683 *
684 * TODO: Store the absolute priority value instead
685 */
686
687 if (priority >= thread->max_priority) {
688 priority = thread->max_priority - thread->task_priority;
689 } else if (priority >= MINPRI_KERNEL) {
690 priority -= MINPRI_KERNEL;
691 } else if (priority >= MINPRI_RESERVED) {
692 priority -= MINPRI_RESERVED;
693 } else {
694 priority -= BASEPRI_DEFAULT;
695 }
696
697 priority += thread->task_priority;
698
699 if (priority > thread->max_priority) {
700 priority = thread->max_priority;
701 } else if (priority < MINPRI) {
702 priority = MINPRI;
703 }
704
705 thread->importance = priority - thread->task_priority;
706
707 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
708
709 if (mode != old_mode) {
710 pend_token->tpt_update_thread_sfi = 1;
711 }
712
713 unlock:
714 thread_unlock(thread);
715 splx(s);
716
717 return kr;
718 }
719
720 void
thread_freeze_base_pri(thread_t thread)721 thread_freeze_base_pri(thread_t thread)
722 {
723 assert(thread == current_thread());
724
725 spl_t s = splsched();
726 thread_lock(thread);
727
728 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
729 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
730
731 thread_unlock(thread);
732 splx(s);
733 }
734
735 bool
thread_unfreeze_base_pri(thread_t thread)736 thread_unfreeze_base_pri(thread_t thread)
737 {
738 assert(thread == current_thread());
739 integer_t base_pri;
740 ast_t ast = 0;
741
742 spl_t s = splsched();
743 thread_lock(thread);
744
745 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
746 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
747
748 base_pri = thread->req_base_pri;
749 if (base_pri != thread->base_pri) {
750 /*
751 * This function returns "true" if the base pri change
752 * is the most likely cause for the preemption.
753 */
754 sched_set_thread_base_priority(thread, base_pri);
755 ast = ast_peek(AST_PREEMPT);
756 }
757
758 thread_unlock(thread);
759 splx(s);
760
761 return ast != 0;
762 }
763
764 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)765 thread_workq_pri_for_qos(thread_qos_t qos)
766 {
767 assert(qos < THREAD_QOS_LAST);
768 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
769 }
770
771 thread_qos_t
thread_workq_qos_for_pri(int priority)772 thread_workq_qos_for_pri(int priority)
773 {
774 thread_qos_t qos;
775 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
776 // indicate that workq should map >UI threads to workq's
777 // internal notation for above-UI work.
778 return THREAD_QOS_UNSPECIFIED;
779 }
780 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
781 // map a given priority up to the next nearest qos band.
782 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
783 return qos;
784 }
785 }
786 return THREAD_QOS_MAINTENANCE;
787 }
788
789 /*
790 * private interface for pthread workqueues
791 *
792 * Set scheduling policy & absolute priority for thread
793 * May be called with spinlocks held
794 * Thread mutex lock is not held
795 */
796 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)797 thread_reset_workq_qos(thread_t thread, uint32_t qos)
798 {
799 struct task_pend_token pend_token = {};
800
801 assert(qos < THREAD_QOS_LAST);
802
803 spl_t s = splsched();
804 thread_lock(thread);
805
806 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
807 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
808 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
809 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
810 &pend_token);
811
812 assert(pend_token.tpt_update_sockets == 0);
813
814 thread_unlock(thread);
815 splx(s);
816
817 thread_policy_update_complete_unlocked(thread, &pend_token);
818 }
819
820 /*
821 * private interface for pthread workqueues
822 *
823 * Set scheduling policy & absolute priority for thread
824 * May be called with spinlocks held
825 * Thread mutex lock is held
826 */
827 void
thread_set_workq_override(thread_t thread,uint32_t qos)828 thread_set_workq_override(thread_t thread, uint32_t qos)
829 {
830 struct task_pend_token pend_token = {};
831
832 assert(qos < THREAD_QOS_LAST);
833
834 spl_t s = splsched();
835 thread_lock(thread);
836
837 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
838 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
839
840 assert(pend_token.tpt_update_sockets == 0);
841
842 thread_unlock(thread);
843 splx(s);
844
845 thread_policy_update_complete_unlocked(thread, &pend_token);
846 }
847
848 /*
849 * private interface for pthread workqueues
850 *
851 * Set scheduling policy & absolute priority for thread
852 * May be called with spinlocks held
853 * Thread mutex lock is not held
854 */
855 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)856 thread_set_workq_pri(thread_t thread,
857 thread_qos_t qos,
858 integer_t priority,
859 integer_t policy)
860 {
861 struct task_pend_token pend_token = {};
862 sched_mode_t mode = convert_policy_to_sched_mode(policy);
863
864 assert(qos < THREAD_QOS_LAST);
865 assert(thread->static_param);
866
867 if (!thread->static_param || !thread->active) {
868 return;
869 }
870
871 spl_t s = splsched();
872 thread_lock(thread);
873
874 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
875 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
876 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
877 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
878 0, &pend_token);
879
880 thread_unlock(thread);
881 splx(s);
882
883 /* Concern: this doesn't hold the mutex... */
884
885 __assert_only kern_return_t kr;
886 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
887 &pend_token);
888 assert(kr == KERN_SUCCESS);
889
890 if (pend_token.tpt_update_thread_sfi) {
891 sfi_reevaluate(thread);
892 }
893 }
894
895 /*
896 * thread_set_mode_and_absolute_pri:
897 *
898 * Set scheduling policy & absolute priority for thread, for deprecated
899 * thread_set_policy and thread_policy interfaces.
900 *
901 * Called with nothing locked.
902 */
903 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)904 thread_set_mode_and_absolute_pri(thread_t thread,
905 integer_t policy,
906 integer_t priority)
907 {
908 kern_return_t kr = KERN_SUCCESS;
909 struct task_pend_token pend_token = {};
910
911 sched_mode_t mode = convert_policy_to_sched_mode(policy);
912
913 thread_mtx_lock(thread);
914
915 if (!thread->active) {
916 kr = KERN_TERMINATED;
917 goto unlock;
918 }
919
920 if (thread_is_static_param(thread)) {
921 kr = KERN_POLICY_STATIC;
922 goto unlock;
923 }
924
925 /* Setting legacy policies on threads kills the current QoS */
926 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
927 thread_remove_qos_policy_locked(thread, &pend_token);
928 }
929
930 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
931
932 unlock:
933 thread_mtx_unlock(thread);
934
935 thread_policy_update_complete_unlocked(thread, &pend_token);
936
937 return kr;
938 }
939
940 /*
941 * Set the thread's requested mode and recompute priority
942 * Called with thread mutex and thread locked
943 *
944 * TODO: Mitigate potential problems caused by moving thread to end of runq
945 * whenever its priority is recomputed
946 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
947 */
948 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)949 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
950 {
951 if (thread->policy_reset) {
952 return;
953 }
954
955 boolean_t removed = thread_run_queue_remove(thread);
956
957 sched_set_thread_mode_user(thread, mode);
958
959 thread_recompute_priority(thread);
960
961 if (removed) {
962 thread_run_queue_reinsert(thread, SCHED_TAILQ);
963 }
964 }
965
966 /* called at splsched with thread lock locked */
967 static void
thread_update_qos_cpu_time_locked(thread_t thread)968 thread_update_qos_cpu_time_locked(thread_t thread)
969 {
970 task_t task = get_threadtask(thread);
971 uint64_t timer_sum, timer_delta;
972
973 /*
974 * This is only as accurate the thread's last context switch or user/kernel
975 * transition (unless precise user/kernel time is disabled).
976 *
977 * TODO: Consider running an update operation here to update it first.
978 * Maybe doable with interrupts disabled from current thread.
979 * If the thread is on a different core, may not be easy to get right.
980 */
981
982 timer_sum = recount_thread_time_mach(thread);
983 timer_delta = timer_sum - thread->vtimer_qos_save;
984
985 thread->vtimer_qos_save = timer_sum;
986
987 uint64_t* task_counter = NULL;
988
989 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
990 switch (thread->effective_policy.thep_qos) {
991 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
992 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
993 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
994 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
995 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
996 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
997 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
998 default:
999 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1000 }
1001
1002 OSAddAtomic64(timer_delta, task_counter);
1003
1004 /* Update the task-level qos stats atomically, because we don't have the task lock. */
1005 switch (thread->requested_policy.thrp_qos) {
1006 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1007 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1008 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1009 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1010 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1011 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1012 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1013 default:
1014 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1015 }
1016
1017 OSAddAtomic64(timer_delta, task_counter);
1018 }
1019
1020 /*
1021 * called with no thread locks held
1022 * may hold task lock
1023 */
1024 void
thread_update_qos_cpu_time(thread_t thread)1025 thread_update_qos_cpu_time(thread_t thread)
1026 {
1027 thread_mtx_lock(thread);
1028
1029 spl_t s = splsched();
1030 thread_lock(thread);
1031
1032 thread_update_qos_cpu_time_locked(thread);
1033
1034 thread_unlock(thread);
1035 splx(s);
1036
1037 thread_mtx_unlock(thread);
1038 }
1039
1040 /*
1041 * Calculate base priority from thread attributes, and set it on the thread
1042 *
1043 * Called with thread_lock and thread mutex held.
1044 */
1045 void
thread_recompute_priority(thread_t thread)1046 thread_recompute_priority(
1047 thread_t thread)
1048 {
1049 integer_t priority;
1050 integer_t adj_priority;
1051
1052 if (thread->policy_reset) {
1053 return;
1054 }
1055
1056 if (thread->sched_mode == TH_MODE_REALTIME) {
1057 uint8_t i = thread->realtime.priority_offset;
1058 assert((i >= 0) && (i < NRTQS));
1059 priority = BASEPRI_RTQUEUES + i;
1060
1061 sched_set_thread_base_priority(thread, priority);
1062 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1063 /* Make sure the thread has a valid deadline */
1064 uint64_t ctime = mach_absolute_time();
1065 thread->realtime.deadline = thread->realtime.constraint + ctime;
1066 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1067 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1068 }
1069 return;
1070
1071 /*
1072 * A thread may have joined a RT work interval but then never
1073 * changed its sched mode or have been demoted. RT work
1074 * intervals will have RT priorities - ignore the priority if
1075 * the thread isn't RT.
1076 */
1077 } else if (thread->effective_policy.thep_wi_driven &&
1078 work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1079 priority = work_interval_get_priority(thread);
1080 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1081 int qos = thread->effective_policy.thep_qos;
1082 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1083 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1084 int qos_scaled_relprio;
1085
1086 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1087 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1088
1089 priority = thread_qos_policy_params.qos_pri[qos];
1090 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1091
1092 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1093 /* Bump priority 46 to 47 when in a frontmost app */
1094 qos_scaled_relprio += 1;
1095 }
1096
1097 /* TODO: factor in renice priority here? */
1098
1099 priority += qos_scaled_relprio;
1100 } else {
1101 if (thread->importance > MAXPRI) {
1102 priority = MAXPRI;
1103 } else if (thread->importance < -MAXPRI) {
1104 priority = -MAXPRI;
1105 } else {
1106 priority = thread->importance;
1107 }
1108
1109 priority += thread->task_priority;
1110 }
1111
1112 /* Boost the priority of threads which are RT demoted. */
1113 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1114 priority = MAX(priority, MAXPRI_USER);
1115 }
1116
1117 priority = MAX(priority, thread->user_promotion_basepri);
1118
1119 /*
1120 * Clamp priority back into the allowed range for this task.
1121 * The initial priority value could be out of this range due to:
1122 * Task clamped to BG or Utility (max-pri is 4, or 20)
1123 * Task is user task (max-pri is 63)
1124 * Task is kernel task (max-pri is 95)
1125 * Note that thread->importance is user-settable to any integer
1126 * via THREAD_PRECEDENCE_POLICY.
1127 */
1128 adj_priority = priority;
1129 adj_priority = MIN(adj_priority, thread->max_priority);
1130 adj_priority = MAX(adj_priority, MINPRI);
1131
1132 /* Allow workload driven priorities to exceed max_priority. */
1133 if (thread->effective_policy.thep_wi_driven) {
1134 adj_priority = MAX(adj_priority, priority);
1135 }
1136
1137 /* Allow priority to exceed max_priority for promotions. */
1138 if (thread->effective_policy.thep_promote_above_task) {
1139 adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1140 }
1141 priority = adj_priority;
1142 assert3u(priority, <=, MAXPRI);
1143
1144 if (thread->saved_mode == TH_MODE_REALTIME &&
1145 sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1146 priority = DEPRESSPRI;
1147 }
1148
1149 if (thread->effective_policy.thep_terminated == TRUE) {
1150 /*
1151 * We temporarily want to override the expected priority to
1152 * ensure that the thread exits in a timely manner.
1153 * Note that this is allowed to exceed thread->max_priority
1154 * so that the thread is no longer clamped to background
1155 * during the final exit phase.
1156 */
1157 if (priority < thread->task_priority) {
1158 priority = thread->task_priority;
1159 }
1160 if (priority < BASEPRI_DEFAULT) {
1161 priority = BASEPRI_DEFAULT;
1162 }
1163 }
1164
1165 #if !defined(XNU_TARGET_OS_OSX)
1166 /* No one can have a base priority less than MAXPRI_THROTTLE */
1167 if (priority < MAXPRI_THROTTLE) {
1168 priority = MAXPRI_THROTTLE;
1169 }
1170 #endif /* !defined(XNU_TARGET_OS_OSX) */
1171
1172 sched_set_thread_base_priority(thread, priority);
1173 }
1174
1175 /* Called with the task lock held, but not the thread mutex or spinlock */
1176 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1177 thread_policy_update_tasklocked(
1178 thread_t thread,
1179 integer_t priority,
1180 integer_t max_priority,
1181 task_pend_token_t pend_token)
1182 {
1183 thread_mtx_lock(thread);
1184
1185 if (!thread->active || thread->policy_reset) {
1186 thread_mtx_unlock(thread);
1187 return;
1188 }
1189
1190 spl_t s = splsched();
1191 thread_lock(thread);
1192
1193 __unused
1194 integer_t old_max_priority = thread->max_priority;
1195
1196 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1197 thread->task_priority = (int16_t)priority;
1198
1199 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1200 thread->max_priority = (int16_t)max_priority;
1201
1202 /*
1203 * When backgrounding a thread, realtime and fixed priority threads
1204 * should be demoted to timeshare background threads.
1205 *
1206 * TODO: Do this inside the thread policy update routine in order to avoid double
1207 * remove/reinsert for a runnable thread
1208 */
1209 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1210 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1211 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1212 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1213 }
1214
1215 thread_policy_update_spinlocked(thread, true, pend_token);
1216
1217 thread_unlock(thread);
1218 splx(s);
1219
1220 thread_mtx_unlock(thread);
1221 }
1222
1223 /*
1224 * Reset thread to default state in preparation for termination
1225 * Called with thread mutex locked
1226 *
1227 * Always called on current thread, so we don't need a run queue remove
1228 */
1229 void
thread_policy_reset(thread_t thread)1230 thread_policy_reset(
1231 thread_t thread)
1232 {
1233 spl_t s;
1234
1235 assert(thread == current_thread());
1236
1237 s = splsched();
1238 thread_lock(thread);
1239
1240 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1241 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1242 }
1243
1244 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1245 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1246 }
1247
1248 if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1249 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1250 }
1251
1252 /* At this point, the various demotions should be inactive */
1253 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1254 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1255
1256 /* Reset thread back to task-default basepri and mode */
1257 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1258
1259 sched_set_thread_mode(thread, newmode);
1260
1261 thread->importance = 0;
1262
1263 /* Prevent further changes to thread base priority or mode */
1264 thread->policy_reset = 1;
1265
1266 sched_set_thread_base_priority(thread, thread->task_priority);
1267
1268 thread_unlock(thread);
1269 splx(s);
1270 }
1271
1272 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1273 thread_policy_get(
1274 thread_t thread,
1275 thread_policy_flavor_t flavor,
1276 thread_policy_t policy_info,
1277 mach_msg_type_number_t *count,
1278 boolean_t *get_default)
1279 {
1280 kern_return_t result = KERN_SUCCESS;
1281
1282 if (thread == THREAD_NULL) {
1283 return KERN_INVALID_ARGUMENT;
1284 }
1285
1286 thread_mtx_lock(thread);
1287 if (!thread->active) {
1288 thread_mtx_unlock(thread);
1289
1290 return KERN_TERMINATED;
1291 }
1292
1293 switch (flavor) {
1294 case THREAD_EXTENDED_POLICY:
1295 {
1296 boolean_t timeshare = TRUE;
1297
1298 if (!(*get_default)) {
1299 spl_t s = splsched();
1300 thread_lock(thread);
1301
1302 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1303 (thread->saved_mode != TH_MODE_REALTIME)) {
1304 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1305 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1306 } else {
1307 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1308 }
1309 } else {
1310 *get_default = TRUE;
1311 }
1312
1313 thread_unlock(thread);
1314 splx(s);
1315 }
1316
1317 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1318 thread_extended_policy_t info;
1319
1320 info = (thread_extended_policy_t)policy_info;
1321 info->timeshare = timeshare;
1322 }
1323
1324 break;
1325 }
1326
1327 case THREAD_TIME_CONSTRAINT_POLICY:
1328 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1329 {
1330 thread_time_constraint_with_priority_policy_t info;
1331
1332 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1333 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1334 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1335
1336 if (*count < min_count) {
1337 result = KERN_INVALID_ARGUMENT;
1338 break;
1339 }
1340
1341 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1342
1343 if (!(*get_default)) {
1344 spl_t s = splsched();
1345 thread_lock(thread);
1346
1347 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1348 (thread->saved_mode == TH_MODE_REALTIME)) {
1349 info->period = thread->realtime.period;
1350 info->computation = thread->realtime.computation;
1351 info->constraint = thread->realtime.constraint;
1352 info->preemptible = thread->realtime.preemptible;
1353 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1354 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1355 }
1356 } else {
1357 *get_default = TRUE;
1358 }
1359
1360 thread_unlock(thread);
1361 splx(s);
1362 }
1363
1364 if (*get_default) {
1365 info->period = 0;
1366 info->computation = default_timeshare_computation;
1367 info->constraint = default_timeshare_constraint;
1368 info->preemptible = TRUE;
1369 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1370 info->priority = BASEPRI_RTQUEUES;
1371 }
1372 }
1373
1374
1375 break;
1376 }
1377
1378 case THREAD_PRECEDENCE_POLICY:
1379 {
1380 thread_precedence_policy_t info;
1381
1382 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1383 result = KERN_INVALID_ARGUMENT;
1384 break;
1385 }
1386
1387 info = (thread_precedence_policy_t)policy_info;
1388
1389 if (!(*get_default)) {
1390 spl_t s = splsched();
1391 thread_lock(thread);
1392
1393 info->importance = thread->importance;
1394
1395 thread_unlock(thread);
1396 splx(s);
1397 } else {
1398 info->importance = 0;
1399 }
1400
1401 break;
1402 }
1403
1404 case THREAD_AFFINITY_POLICY:
1405 {
1406 thread_affinity_policy_t info;
1407
1408 if (!thread_affinity_is_supported()) {
1409 result = KERN_NOT_SUPPORTED;
1410 break;
1411 }
1412 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1413 result = KERN_INVALID_ARGUMENT;
1414 break;
1415 }
1416
1417 info = (thread_affinity_policy_t)policy_info;
1418
1419 if (!(*get_default)) {
1420 info->affinity_tag = thread_affinity_get(thread);
1421 } else {
1422 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1423 }
1424
1425 break;
1426 }
1427
1428 case THREAD_POLICY_STATE:
1429 {
1430 thread_policy_state_t info;
1431
1432 if (*count < THREAD_POLICY_STATE_COUNT) {
1433 result = KERN_INVALID_ARGUMENT;
1434 break;
1435 }
1436
1437 /* Only root can get this info */
1438 if (!task_is_privileged(current_task())) {
1439 result = KERN_PROTECTION_FAILURE;
1440 break;
1441 }
1442
1443 info = (thread_policy_state_t)(void*)policy_info;
1444
1445 if (!(*get_default)) {
1446 info->flags = 0;
1447
1448 spl_t s = splsched();
1449 thread_lock(thread);
1450
1451 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1452
1453 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1454 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1455
1456 info->thps_user_promotions = 0;
1457 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1458 info->thps_ipc_overrides = thread->kevent_overrides;
1459
1460 proc_get_thread_policy_bitfield(thread, info);
1461
1462 thread_unlock(thread);
1463 splx(s);
1464 } else {
1465 info->requested = 0;
1466 info->effective = 0;
1467 info->pending = 0;
1468 }
1469
1470 break;
1471 }
1472
1473 case THREAD_REQUESTED_STATE_POLICY:
1474 {
1475 if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1476 result = KERN_INVALID_ARGUMENT;
1477 break;
1478 }
1479
1480 thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1481 struct thread_requested_policy *req_policy = &thread->requested_policy;
1482
1483 info->thrq_base_qos = req_policy->thrp_qos;
1484 info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1485 info->thrq_qos_override = req_policy->thrp_qos_override;
1486 info->thrq_qos_promote = req_policy->thrp_qos_promote;
1487 info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1488 info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1489 info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1490
1491 break;
1492 }
1493
1494 case THREAD_LATENCY_QOS_POLICY:
1495 {
1496 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1497 thread_latency_qos_t plqos;
1498
1499 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1500 result = KERN_INVALID_ARGUMENT;
1501 break;
1502 }
1503
1504 if (*get_default) {
1505 plqos = 0;
1506 } else {
1507 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1508 }
1509
1510 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1511 }
1512 break;
1513
1514 case THREAD_THROUGHPUT_QOS_POLICY:
1515 {
1516 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1517 thread_throughput_qos_t ptqos;
1518
1519 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1520 result = KERN_INVALID_ARGUMENT;
1521 break;
1522 }
1523
1524 if (*get_default) {
1525 ptqos = 0;
1526 } else {
1527 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1528 }
1529
1530 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1531 }
1532 break;
1533
1534 case THREAD_QOS_POLICY:
1535 {
1536 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1537
1538 if (*count < THREAD_QOS_POLICY_COUNT) {
1539 result = KERN_INVALID_ARGUMENT;
1540 break;
1541 }
1542
1543 if (!(*get_default)) {
1544 int relprio_value = 0;
1545 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1546 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1547
1548 info->tier_importance = -relprio_value;
1549 } else {
1550 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1551 info->tier_importance = 0;
1552 }
1553
1554 break;
1555 }
1556
1557 default:
1558 result = KERN_INVALID_ARGUMENT;
1559 break;
1560 }
1561
1562 thread_mtx_unlock(thread);
1563
1564 return result;
1565 }
1566
1567 void
thread_policy_create(thread_t thread)1568 thread_policy_create(thread_t thread)
1569 {
1570 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1571 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1572 thread_tid(thread), theffective_0(thread),
1573 theffective_1(thread), thread->base_pri, 0);
1574
1575 /* We pass a pend token but ignore it */
1576 struct task_pend_token pend_token = {};
1577
1578 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1579
1580 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1581 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1582 thread_tid(thread), theffective_0(thread),
1583 theffective_1(thread), thread->base_pri, 0);
1584 }
1585
1586 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1587 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1588 {
1589 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1590 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1591 thread_tid(thread), theffective_0(thread),
1592 theffective_1(thread), thread->base_pri, 0);
1593
1594 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1595
1596 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1597 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1598 thread_tid(thread), theffective_0(thread),
1599 theffective_1(thread), thread->base_pri, 0);
1600 }
1601
1602
1603
1604 /*
1605 * One thread state update function TO RULE THEM ALL
1606 *
1607 * This function updates the thread effective policy fields
1608 * and pushes the results to the relevant subsystems.
1609 *
1610 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1611 */
1612 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1613 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1614 task_pend_token_t pend_token)
1615 {
1616 /*
1617 * Step 1:
1618 * Gather requested policy and effective task state
1619 */
1620
1621 const struct thread_requested_policy requested = thread->requested_policy;
1622 const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1623
1624 /*
1625 * Step 2:
1626 * Calculate new effective policies from requested policy, task and thread state
1627 * Rules:
1628 * Don't change requested, it won't take effect
1629 */
1630
1631 struct thread_effective_policy next = {};
1632
1633 next.thep_wi_driven = requested.thrp_wi_driven;
1634
1635 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1636
1637 uint32_t next_qos = requested.thrp_qos;
1638
1639 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1640 next_qos = MAX(requested.thrp_qos_override, next_qos);
1641 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1642 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1643 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1644 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1645 }
1646
1647 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1648 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1649 /*
1650 * This thread is turnstile-boosted higher than the adaptive clamp
1651 * by a synchronous waiter. Allow that to override the adaptive
1652 * clamp temporarily for this thread only.
1653 */
1654 next.thep_promote_above_task = true;
1655 next_qos = requested.thrp_qos_promote;
1656 }
1657
1658 next.thep_qos = next_qos;
1659
1660 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1661 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1662 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1663 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1664 } else {
1665 next.thep_qos = task_effective.tep_qos_clamp;
1666 }
1667 next.thep_wi_driven = 0;
1668 }
1669
1670 /*
1671 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1672 * This allows QoS promotions to work properly even after the process is unclamped.
1673 */
1674 next.thep_qos_promote = next.thep_qos;
1675
1676 /* The ceiling only applies to threads that are in the QoS world */
1677 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1678 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1679 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1680 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1681 }
1682
1683 /*
1684 * The QoS relative priority is only applicable when the original programmer's
1685 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1686 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1687 * since otherwise it would be lower than unclamped threads. Similarly, in the
1688 * presence of boosting, the programmer doesn't know what other actors
1689 * are boosting the thread.
1690 */
1691 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1692 (requested.thrp_qos == next.thep_qos) &&
1693 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1694 next.thep_qos_relprio = requested.thrp_qos_relprio;
1695 } else {
1696 next.thep_qos_relprio = 0;
1697 }
1698
1699 /* Calculate DARWIN_BG */
1700 bool wants_darwinbg = false;
1701 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1702
1703 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1704 wants_darwinbg = true;
1705 }
1706
1707 /*
1708 * If DARWIN_BG has been requested at either level, it's engaged.
1709 * darwinbg threads always create bg sockets,
1710 * but only some types of darwinbg change the sockets
1711 * after they're created
1712 */
1713 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1714 wants_all_sockets_bg = wants_darwinbg = true;
1715 }
1716
1717 if (requested.thrp_pidbind_bg) {
1718 wants_all_sockets_bg = wants_darwinbg = true;
1719 }
1720
1721 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1722 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1723 wants_darwinbg = true;
1724 }
1725
1726 /* Calculate side effects of DARWIN_BG */
1727
1728 if (wants_darwinbg) {
1729 next.thep_darwinbg = 1;
1730 next.thep_wi_driven = 0;
1731 }
1732
1733 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1734 next.thep_new_sockets_bg = 1;
1735 }
1736
1737 /* Don't use task_effective.tep_all_sockets_bg here */
1738 if (wants_all_sockets_bg) {
1739 next.thep_all_sockets_bg = 1;
1740 }
1741
1742 /* darwinbg implies background QOS (or lower) */
1743 if (next.thep_darwinbg &&
1744 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1745 next.thep_qos = THREAD_QOS_BACKGROUND;
1746 next.thep_qos_relprio = 0;
1747 }
1748
1749 /* Calculate IO policy */
1750
1751 int iopol = THROTTLE_LEVEL_TIER0;
1752
1753 /* Factor in the task's IO policy */
1754 if (next.thep_darwinbg) {
1755 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1756 }
1757
1758 if (!next.thep_promote_above_task) {
1759 iopol = MAX(iopol, task_effective.tep_io_tier);
1760 }
1761
1762 /* Look up the associated IO tier value for the QoS class */
1763 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1764
1765 iopol = MAX(iopol, requested.thrp_int_iotier);
1766 iopol = MAX(iopol, requested.thrp_ext_iotier);
1767
1768 /* Apply the kevent iotier override */
1769 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1770
1771 next.thep_io_tier = iopol;
1772
1773 /*
1774 * If a QoS override is causing IO to go into a lower tier, we also set
1775 * the passive bit so that a thread doesn't end up stuck in its own throttle
1776 * window when the override goes away.
1777 */
1778
1779 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1780 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1781 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1782
1783 /* Calculate Passive IO policy */
1784 if (requested.thrp_ext_iopassive ||
1785 requested.thrp_int_iopassive ||
1786 qos_io_override_active ||
1787 task_effective.tep_io_passive) {
1788 next.thep_io_passive = 1;
1789 }
1790
1791 /* Calculate timer QOS */
1792 uint32_t latency_qos = requested.thrp_latency_qos;
1793
1794 if (!next.thep_promote_above_task) {
1795 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1796 }
1797
1798 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1799
1800 next.thep_latency_qos = latency_qos;
1801
1802 /* Calculate throughput QOS */
1803 uint32_t through_qos = requested.thrp_through_qos;
1804
1805 if (!next.thep_promote_above_task) {
1806 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1807 }
1808
1809 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1810
1811 next.thep_through_qos = through_qos;
1812
1813 if (task_effective.tep_terminated || requested.thrp_terminated) {
1814 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1815 next.thep_terminated = 1;
1816 next.thep_darwinbg = 0;
1817 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1818 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1819 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1820 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1821 next.thep_wi_driven = 0;
1822 }
1823
1824 /*
1825 * Step 3:
1826 * Swap out old policy for new policy
1827 */
1828
1829 struct thread_effective_policy prev = thread->effective_policy;
1830
1831 thread_update_qos_cpu_time_locked(thread);
1832
1833 /* This is the point where the new values become visible to other threads */
1834 thread->effective_policy = next;
1835
1836 /*
1837 * Step 4:
1838 * Pend updates that can't be done while holding the thread lock
1839 */
1840
1841 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1842 pend_token->tpt_update_sockets = 1;
1843 }
1844
1845 /* TODO: Doesn't this only need to be done if the throttle went up? */
1846 if (prev.thep_io_tier != next.thep_io_tier) {
1847 pend_token->tpt_update_throttle = 1;
1848 }
1849
1850 /*
1851 * Check for the attributes that sfi_thread_classify() consults,
1852 * and trigger SFI re-evaluation.
1853 */
1854 if (prev.thep_qos != next.thep_qos ||
1855 prev.thep_darwinbg != next.thep_darwinbg) {
1856 pend_token->tpt_update_thread_sfi = 1;
1857 }
1858
1859 integer_t old_base_pri = thread->base_pri;
1860
1861 /*
1862 * Step 5:
1863 * Update other subsystems as necessary if something has changed
1864 */
1865
1866 /* Check for the attributes that thread_recompute_priority() consults */
1867 if (prev.thep_qos != next.thep_qos ||
1868 prev.thep_qos_relprio != next.thep_qos_relprio ||
1869 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1870 prev.thep_promote_above_task != next.thep_promote_above_task ||
1871 prev.thep_terminated != next.thep_terminated ||
1872 prev.thep_wi_driven != next.thep_wi_driven ||
1873 pend_token->tpt_force_recompute_pri == 1 ||
1874 recompute_priority) {
1875 thread_recompute_priority(thread);
1876 }
1877
1878 /*
1879 * Check if the thread is waiting on a turnstile and needs priority propagation.
1880 */
1881 if (pend_token->tpt_update_turnstile &&
1882 ((old_base_pri == thread->base_pri) ||
1883 !thread_get_waiting_turnstile(thread))) {
1884 /*
1885 * Reset update turnstile pend token since either
1886 * the thread priority did not change or thread is
1887 * not blocked on a turnstile.
1888 */
1889 pend_token->tpt_update_turnstile = 0;
1890 }
1891 }
1892
1893
1894 /*
1895 * Initiate a thread policy state transition on a thread with its TID
1896 * Useful if you cannot guarantee the thread won't get terminated
1897 * Precondition: No locks are held
1898 * Will take task lock - using the non-tid variant is faster
1899 * if you already have a thread ref.
1900 */
1901 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1902 proc_set_thread_policy_with_tid(task_t task,
1903 uint64_t tid,
1904 int category,
1905 int flavor,
1906 int value)
1907 {
1908 /* takes task lock, returns ref'ed thread or NULL */
1909 thread_t thread = task_findtid(task, tid);
1910
1911 if (thread == THREAD_NULL) {
1912 return;
1913 }
1914
1915 proc_set_thread_policy(thread, category, flavor, value);
1916
1917 thread_deallocate(thread);
1918 }
1919
1920 /*
1921 * Initiate a thread policy transition on a thread
1922 * This path supports networking transitions (i.e. darwinbg transitions)
1923 * Precondition: No locks are held
1924 */
1925 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1926 proc_set_thread_policy(thread_t thread,
1927 int category,
1928 int flavor,
1929 int value)
1930 {
1931 proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1932 }
1933
1934 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1935 proc_set_thread_policy_ext(thread_t thread,
1936 int category,
1937 int flavor,
1938 int value,
1939 int value2)
1940 {
1941 struct task_pend_token pend_token = {};
1942
1943 thread_mtx_lock(thread);
1944
1945 proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1946
1947 thread_mtx_unlock(thread);
1948
1949 thread_policy_update_complete_unlocked(thread, &pend_token);
1950 }
1951
1952 /*
1953 * Do the things that can't be done while holding a thread mutex.
1954 * These are set up to call back into thread policy to get the latest value,
1955 * so they don't have to be synchronized with the update.
1956 * The only required semantic is 'call this sometime after updating effective policy'
1957 *
1958 * Precondition: Thread mutex is not held
1959 *
1960 * This may be called with the task lock held, but in that case it won't be
1961 * called with tpt_update_sockets set.
1962 */
1963 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1964 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1965 {
1966 #ifdef MACH_BSD
1967 if (pend_token->tpt_update_sockets) {
1968 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1969 }
1970 #endif /* MACH_BSD */
1971
1972 if (pend_token->tpt_update_throttle) {
1973 rethrottle_thread(get_bsdthread_info(thread));
1974 }
1975
1976 if (pend_token->tpt_update_thread_sfi) {
1977 sfi_reevaluate(thread);
1978 }
1979
1980 if (pend_token->tpt_update_turnstile) {
1981 turnstile_update_thread_priority_chain(thread);
1982 }
1983 }
1984
1985 /*
1986 * Set and update thread policy
1987 * Thread mutex might be held
1988 */
1989 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1990 proc_set_thread_policy_locked(thread_t thread,
1991 int category,
1992 int flavor,
1993 int value,
1994 int value2,
1995 task_pend_token_t pend_token)
1996 {
1997 spl_t s = splsched();
1998 thread_lock(thread);
1999
2000 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2001
2002 thread_unlock(thread);
2003 splx(s);
2004 }
2005
2006 /*
2007 * Set and update thread policy
2008 * Thread spinlock is held
2009 */
2010 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2011 proc_set_thread_policy_spinlocked(thread_t thread,
2012 int category,
2013 int flavor,
2014 int value,
2015 int value2,
2016 task_pend_token_t pend_token)
2017 {
2018 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2019 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2020 thread_tid(thread), threquested_0(thread),
2021 threquested_1(thread), value, 0);
2022
2023 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2024
2025 thread_policy_update_spinlocked(thread, false, pend_token);
2026
2027 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2028 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2029 thread_tid(thread), threquested_0(thread),
2030 threquested_1(thread), tpending(pend_token), 0);
2031 }
2032
2033 /*
2034 * Set the requested state for a specific flavor to a specific value.
2035 */
2036 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2037 thread_set_requested_policy_spinlocked(thread_t thread,
2038 int category,
2039 int flavor,
2040 int value,
2041 int value2,
2042 task_pend_token_t pend_token)
2043 {
2044 int tier, passive;
2045
2046 struct thread_requested_policy requested = thread->requested_policy;
2047
2048 switch (flavor) {
2049 /* Category: EXTERNAL and INTERNAL, thread and task */
2050
2051 case TASK_POLICY_DARWIN_BG:
2052 if (category == TASK_POLICY_EXTERNAL) {
2053 requested.thrp_ext_darwinbg = value;
2054 } else {
2055 requested.thrp_int_darwinbg = value;
2056 }
2057 pend_token->tpt_update_turnstile = 1;
2058 break;
2059
2060 case TASK_POLICY_IOPOL:
2061 proc_iopol_to_tier(value, &tier, &passive);
2062 if (category == TASK_POLICY_EXTERNAL) {
2063 requested.thrp_ext_iotier = tier;
2064 requested.thrp_ext_iopassive = passive;
2065 } else {
2066 requested.thrp_int_iotier = tier;
2067 requested.thrp_int_iopassive = passive;
2068 }
2069 break;
2070
2071 case TASK_POLICY_IO:
2072 if (category == TASK_POLICY_EXTERNAL) {
2073 requested.thrp_ext_iotier = value;
2074 } else {
2075 requested.thrp_int_iotier = value;
2076 }
2077 break;
2078
2079 case TASK_POLICY_PASSIVE_IO:
2080 if (category == TASK_POLICY_EXTERNAL) {
2081 requested.thrp_ext_iopassive = value;
2082 } else {
2083 requested.thrp_int_iopassive = value;
2084 }
2085 break;
2086
2087 /* Category: ATTRIBUTE, thread only */
2088
2089 case TASK_POLICY_PIDBIND_BG:
2090 assert(category == TASK_POLICY_ATTRIBUTE);
2091 requested.thrp_pidbind_bg = value;
2092 pend_token->tpt_update_turnstile = 1;
2093 break;
2094
2095 case TASK_POLICY_LATENCY_QOS:
2096 assert(category == TASK_POLICY_ATTRIBUTE);
2097 requested.thrp_latency_qos = value;
2098 break;
2099
2100 case TASK_POLICY_THROUGH_QOS:
2101 assert(category == TASK_POLICY_ATTRIBUTE);
2102 requested.thrp_through_qos = value;
2103 break;
2104
2105 case TASK_POLICY_QOS_OVERRIDE:
2106 assert(category == TASK_POLICY_ATTRIBUTE);
2107 requested.thrp_qos_override = value;
2108 pend_token->tpt_update_turnstile = 1;
2109 break;
2110
2111 case TASK_POLICY_QOS_AND_RELPRIO:
2112 assert(category == TASK_POLICY_ATTRIBUTE);
2113 requested.thrp_qos = value;
2114 requested.thrp_qos_relprio = value2;
2115 pend_token->tpt_update_turnstile = 1;
2116 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2117 break;
2118
2119 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2120 assert(category == TASK_POLICY_ATTRIBUTE);
2121 requested.thrp_qos_workq_override = value;
2122 pend_token->tpt_update_turnstile = 1;
2123 break;
2124
2125 case TASK_POLICY_QOS_PROMOTE:
2126 assert(category == TASK_POLICY_ATTRIBUTE);
2127 requested.thrp_qos_promote = value;
2128 break;
2129
2130 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2131 assert(category == TASK_POLICY_ATTRIBUTE);
2132 requested.thrp_qos_kevent_override = value;
2133 pend_token->tpt_update_turnstile = 1;
2134 break;
2135
2136 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2137 assert(category == TASK_POLICY_ATTRIBUTE);
2138 requested.thrp_qos_wlsvc_override = value;
2139 pend_token->tpt_update_turnstile = 1;
2140 break;
2141
2142 case TASK_POLICY_TERMINATED:
2143 assert(category == TASK_POLICY_ATTRIBUTE);
2144 requested.thrp_terminated = value;
2145 break;
2146
2147 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2148 assert(category == TASK_POLICY_ATTRIBUTE);
2149 requested.thrp_iotier_kevent_override = value;
2150 break;
2151
2152 case TASK_POLICY_WI_DRIVEN:
2153 assert(category == TASK_POLICY_ATTRIBUTE);
2154 assert(thread == current_thread());
2155
2156 const bool set_policy = value;
2157 const sched_mode_t mode = value2;
2158
2159 requested.thrp_wi_driven = set_policy ? 1 : 0;
2160
2161 /*
2162 * No sched mode change for REALTIME (threads must explicitly
2163 * opt-in), however the priority_offset needs to be updated.
2164 */
2165 if (mode == TH_MODE_REALTIME) {
2166 const int pri = work_interval_get_priority(thread);
2167 assert3u(pri, >=, BASEPRI_RTQUEUES);
2168 thread->realtime.priority_offset = set_policy ?
2169 (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2170 } else {
2171 sched_set_thread_mode_user(thread, mode);
2172 if (set_policy) {
2173 thread->static_param = true;
2174 }
2175 }
2176 break;
2177
2178 default:
2179 panic("unknown task policy: %d %d %d", category, flavor, value);
2180 break;
2181 }
2182
2183 thread->requested_policy = requested;
2184 }
2185
2186 /*
2187 * Gets what you set. Effective values may be different.
2188 * Precondition: No locks are held
2189 */
2190 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2191 proc_get_thread_policy(thread_t thread,
2192 int category,
2193 int flavor)
2194 {
2195 int value = 0;
2196 thread_mtx_lock(thread);
2197 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2198 thread_mtx_unlock(thread);
2199 return value;
2200 }
2201
2202 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2203 proc_get_thread_policy_locked(thread_t thread,
2204 int category,
2205 int flavor,
2206 int* value2)
2207 {
2208 int value = 0;
2209
2210 spl_t s = splsched();
2211 thread_lock(thread);
2212
2213 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2214
2215 thread_unlock(thread);
2216 splx(s);
2217
2218 return value;
2219 }
2220
2221 /*
2222 * Gets what you set. Effective values may be different.
2223 */
2224 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2225 thread_get_requested_policy_spinlocked(thread_t thread,
2226 int category,
2227 int flavor,
2228 int* value2)
2229 {
2230 int value = 0;
2231
2232 struct thread_requested_policy requested = thread->requested_policy;
2233
2234 switch (flavor) {
2235 case TASK_POLICY_DARWIN_BG:
2236 if (category == TASK_POLICY_EXTERNAL) {
2237 value = requested.thrp_ext_darwinbg;
2238 } else {
2239 value = requested.thrp_int_darwinbg;
2240 }
2241 break;
2242 case TASK_POLICY_IOPOL:
2243 if (category == TASK_POLICY_EXTERNAL) {
2244 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2245 requested.thrp_ext_iopassive);
2246 } else {
2247 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2248 requested.thrp_int_iopassive);
2249 }
2250 break;
2251 case TASK_POLICY_IO:
2252 if (category == TASK_POLICY_EXTERNAL) {
2253 value = requested.thrp_ext_iotier;
2254 } else {
2255 value = requested.thrp_int_iotier;
2256 }
2257 break;
2258 case TASK_POLICY_PASSIVE_IO:
2259 if (category == TASK_POLICY_EXTERNAL) {
2260 value = requested.thrp_ext_iopassive;
2261 } else {
2262 value = requested.thrp_int_iopassive;
2263 }
2264 break;
2265 case TASK_POLICY_QOS:
2266 assert(category == TASK_POLICY_ATTRIBUTE);
2267 value = requested.thrp_qos;
2268 break;
2269 case TASK_POLICY_QOS_OVERRIDE:
2270 assert(category == TASK_POLICY_ATTRIBUTE);
2271 value = requested.thrp_qos_override;
2272 break;
2273 case TASK_POLICY_LATENCY_QOS:
2274 assert(category == TASK_POLICY_ATTRIBUTE);
2275 value = requested.thrp_latency_qos;
2276 break;
2277 case TASK_POLICY_THROUGH_QOS:
2278 assert(category == TASK_POLICY_ATTRIBUTE);
2279 value = requested.thrp_through_qos;
2280 break;
2281 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2282 assert(category == TASK_POLICY_ATTRIBUTE);
2283 value = requested.thrp_qos_workq_override;
2284 break;
2285 case TASK_POLICY_QOS_AND_RELPRIO:
2286 assert(category == TASK_POLICY_ATTRIBUTE);
2287 assert(value2 != NULL);
2288 value = requested.thrp_qos;
2289 *value2 = requested.thrp_qos_relprio;
2290 break;
2291 case TASK_POLICY_QOS_PROMOTE:
2292 assert(category == TASK_POLICY_ATTRIBUTE);
2293 value = requested.thrp_qos_promote;
2294 break;
2295 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2296 assert(category == TASK_POLICY_ATTRIBUTE);
2297 value = requested.thrp_qos_kevent_override;
2298 break;
2299 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2300 assert(category == TASK_POLICY_ATTRIBUTE);
2301 value = requested.thrp_qos_wlsvc_override;
2302 break;
2303 case TASK_POLICY_TERMINATED:
2304 assert(category == TASK_POLICY_ATTRIBUTE);
2305 value = requested.thrp_terminated;
2306 break;
2307 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2308 assert(category == TASK_POLICY_ATTRIBUTE);
2309 value = requested.thrp_iotier_kevent_override;
2310 break;
2311
2312 case TASK_POLICY_WI_DRIVEN:
2313 assert(category == TASK_POLICY_ATTRIBUTE);
2314 value = requested.thrp_wi_driven;
2315 break;
2316
2317 default:
2318 panic("unknown policy_flavor %d", flavor);
2319 break;
2320 }
2321
2322 return value;
2323 }
2324
2325 /*
2326 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2327 *
2328 * NOTE: This accessor does not take the task or thread lock.
2329 * Notifications of state updates need to be externally synchronized with state queries.
2330 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2331 * within the context of a timer interrupt.
2332 *
2333 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2334 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2335 * I don't think that cost is worth not having the right answer.
2336 */
2337 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2338 proc_get_effective_thread_policy(thread_t thread,
2339 int flavor)
2340 {
2341 int value = 0;
2342
2343 switch (flavor) {
2344 case TASK_POLICY_DARWIN_BG:
2345 /*
2346 * This call is used within the timer layer, as well as
2347 * prioritizing requests to the graphics system.
2348 * It also informs SFI and originator-bg-state.
2349 * Returns 1 for background mode, 0 for normal mode
2350 */
2351
2352 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2353 break;
2354 case TASK_POLICY_IO:
2355 /*
2356 * The I/O system calls here to find out what throttling tier to apply to an operation.
2357 * Returns THROTTLE_LEVEL_* values
2358 */
2359 value = thread->effective_policy.thep_io_tier;
2360 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2361 value = MIN(value, thread->iotier_override);
2362 }
2363 break;
2364 case TASK_POLICY_PASSIVE_IO:
2365 /*
2366 * The I/O system calls here to find out whether an operation should be passive.
2367 * (i.e. not cause operations with lower throttle tiers to be throttled)
2368 * Returns 1 for passive mode, 0 for normal mode
2369 *
2370 * If an override is causing IO to go into a lower tier, we also set
2371 * the passive bit so that a thread doesn't end up stuck in its own throttle
2372 * window when the override goes away.
2373 */
2374 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2375 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2376 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2377 value = 1;
2378 }
2379 break;
2380 case TASK_POLICY_ALL_SOCKETS_BG:
2381 /*
2382 * do_background_socket() calls this to determine whether
2383 * it should change the thread's sockets
2384 * Returns 1 for background mode, 0 for normal mode
2385 * This consults both thread and task so un-DBGing a thread while the task is BG
2386 * doesn't get you out of the network throttle.
2387 */
2388 value = (thread->effective_policy.thep_all_sockets_bg ||
2389 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2390 break;
2391 case TASK_POLICY_NEW_SOCKETS_BG:
2392 /*
2393 * socreate() calls this to determine if it should mark a new socket as background
2394 * Returns 1 for background mode, 0 for normal mode
2395 */
2396 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2397 break;
2398 case TASK_POLICY_LATENCY_QOS:
2399 /*
2400 * timer arming calls into here to find out the timer coalescing level
2401 * Returns a latency QoS tier (0-6)
2402 */
2403 value = thread->effective_policy.thep_latency_qos;
2404 break;
2405 case TASK_POLICY_THROUGH_QOS:
2406 /*
2407 * This value is passed into the urgency callout from the scheduler
2408 * to the performance management subsystem.
2409 *
2410 * Returns a throughput QoS tier (0-6)
2411 */
2412 value = thread->effective_policy.thep_through_qos;
2413 break;
2414 case TASK_POLICY_QOS:
2415 /*
2416 * This is communicated to the performance management layer and SFI.
2417 *
2418 * Returns a QoS policy tier
2419 */
2420 value = thread->effective_policy.thep_qos;
2421 break;
2422 default:
2423 panic("unknown thread policy flavor %d", flavor);
2424 break;
2425 }
2426
2427 return value;
2428 }
2429
2430
2431 /*
2432 * (integer_t) casts limit the number of bits we can fit here
2433 * this interface is deprecated and replaced by the _EXT struct ?
2434 */
2435 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2436 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2437 {
2438 uint64_t bits = 0;
2439 struct thread_requested_policy requested = thread->requested_policy;
2440
2441 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2442 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2443 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2444 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2445 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2446 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2447
2448 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2449 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2450
2451 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2452
2453 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2454 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2455
2456 info->requested = (integer_t) bits;
2457 bits = 0;
2458
2459 struct thread_effective_policy effective = thread->effective_policy;
2460
2461 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2462
2463 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2464 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2465 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2466 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2467
2468 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2469
2470 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2471 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2472
2473 info->effective = (integer_t)bits;
2474 bits = 0;
2475
2476 info->pending = 0;
2477 }
2478
2479 /*
2480 * Sneakily trace either the task and thread requested
2481 * or just the thread requested, depending on if we have enough room.
2482 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2483 *
2484 * LP32 LP64
2485 * threquested_0(thread) thread[0] task[0]
2486 * threquested_1(thread) thread[1] thread[0]
2487 *
2488 */
2489
2490 uintptr_t
threquested_0(thread_t thread)2491 threquested_0(thread_t thread)
2492 {
2493 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2494
2495 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2496
2497 return raw[0];
2498 }
2499
2500 uintptr_t
threquested_1(thread_t thread)2501 threquested_1(thread_t thread)
2502 {
2503 #if defined __LP64__
2504 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2505 #else
2506 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2507 return raw[1];
2508 #endif
2509 }
2510
2511 uintptr_t
theffective_0(thread_t thread)2512 theffective_0(thread_t thread)
2513 {
2514 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2515
2516 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2517 return raw[0];
2518 }
2519
2520 uintptr_t
theffective_1(thread_t thread)2521 theffective_1(thread_t thread)
2522 {
2523 #if defined __LP64__
2524 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2525 #else
2526 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2527 return raw[1];
2528 #endif
2529 }
2530
2531
2532 /*
2533 * Set an override on the thread which is consulted with a
2534 * higher priority than the task/thread policy. This should
2535 * only be set for temporary grants until the thread
2536 * returns to the userspace boundary
2537 *
2538 * We use atomic operations to swap in the override, with
2539 * the assumption that the thread itself can
2540 * read the override and clear it on return to userspace.
2541 *
2542 * No locking is performed, since it is acceptable to see
2543 * a stale override for one loop through throttle_lowpri_io().
2544 * However a thread reference must be held on the thread.
2545 */
2546
2547 void
set_thread_iotier_override(thread_t thread,int policy)2548 set_thread_iotier_override(thread_t thread, int policy)
2549 {
2550 int current_override;
2551
2552 /* Let most aggressive I/O policy win until user boundary */
2553 do {
2554 current_override = thread->iotier_override;
2555
2556 if (current_override != THROTTLE_LEVEL_NONE) {
2557 policy = MIN(current_override, policy);
2558 }
2559
2560 if (current_override == policy) {
2561 /* no effective change */
2562 return;
2563 }
2564 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2565
2566 /*
2567 * Since the thread may be currently throttled,
2568 * re-evaluate tiers and potentially break out
2569 * of an msleep
2570 */
2571 rethrottle_thread(get_bsdthread_info(thread));
2572 }
2573
2574 /*
2575 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2576 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2577 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2578 * priority thread. In these cases, we attempt to propagate the priority token, as long
2579 * as the subsystem informs us of the relationships between the threads. The userspace
2580 * synchronization subsystem should maintain the information of owner->resource and
2581 * resource->waiters itself.
2582 */
2583
2584 /*
2585 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2586 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2587 * to be handled specially in the future, but for now it's fine to slam
2588 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2589 */
2590 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2591 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2592 {
2593 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2594 /* Map all input resource/type to a single one */
2595 *resource = USER_ADDR_NULL;
2596 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2597 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2598 /* no transform */
2599 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2600 /* Map all mutex overrides to a single one, to avoid memory overhead */
2601 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2602 *resource = USER_ADDR_NULL;
2603 }
2604 }
2605 }
2606
2607 /* This helper routine finds an existing override if known. Locking should be done by caller */
2608 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2609 find_qos_override(thread_t thread,
2610 user_addr_t resource,
2611 int resource_type)
2612 {
2613 struct thread_qos_override *override;
2614
2615 override = thread->overrides;
2616 while (override) {
2617 if (override->override_resource == resource &&
2618 override->override_resource_type == resource_type) {
2619 return override;
2620 }
2621
2622 override = override->override_next;
2623 }
2624
2625 return NULL;
2626 }
2627
2628 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2629 find_and_decrement_qos_override(thread_t thread,
2630 user_addr_t resource,
2631 int resource_type,
2632 boolean_t reset,
2633 struct thread_qos_override **free_override_list)
2634 {
2635 struct thread_qos_override *override, *override_prev;
2636
2637 override_prev = NULL;
2638 override = thread->overrides;
2639 while (override) {
2640 struct thread_qos_override *override_next = override->override_next;
2641
2642 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2643 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2644 if (reset) {
2645 override->override_contended_resource_count = 0;
2646 } else {
2647 override->override_contended_resource_count--;
2648 }
2649
2650 if (override->override_contended_resource_count == 0) {
2651 if (override_prev == NULL) {
2652 thread->overrides = override_next;
2653 } else {
2654 override_prev->override_next = override_next;
2655 }
2656
2657 /* Add to out-param for later zfree */
2658 override->override_next = *free_override_list;
2659 *free_override_list = override;
2660 } else {
2661 override_prev = override;
2662 }
2663
2664 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2665 return;
2666 }
2667 } else {
2668 override_prev = override;
2669 }
2670
2671 override = override_next;
2672 }
2673 }
2674
2675 /* This helper recalculates the current requested override using the policy selected at boot */
2676 static int
calculate_requested_qos_override(thread_t thread)2677 calculate_requested_qos_override(thread_t thread)
2678 {
2679 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2680 return THREAD_QOS_UNSPECIFIED;
2681 }
2682
2683 /* iterate over all overrides and calculate MAX */
2684 struct thread_qos_override *override;
2685 int qos_override = THREAD_QOS_UNSPECIFIED;
2686
2687 override = thread->overrides;
2688 while (override) {
2689 qos_override = MAX(qos_override, override->override_qos);
2690 override = override->override_next;
2691 }
2692
2693 return qos_override;
2694 }
2695
2696 /*
2697 * Returns:
2698 * - 0 on success
2699 * - EINVAL if some invalid input was passed
2700 */
2701 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2702 proc_thread_qos_add_override_internal(thread_t thread,
2703 int override_qos,
2704 boolean_t first_override_for_resource,
2705 user_addr_t resource,
2706 int resource_type)
2707 {
2708 struct task_pend_token pend_token = {};
2709 int rc = 0;
2710
2711 thread_mtx_lock(thread);
2712
2713 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2714 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2715
2716 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2717 uint64_t, thread->requested_policy.thrp_qos,
2718 uint64_t, thread->effective_policy.thep_qos,
2719 int, override_qos, boolean_t, first_override_for_resource);
2720
2721 struct thread_qos_override *override;
2722 struct thread_qos_override *override_new = NULL;
2723 int new_qos_override, prev_qos_override;
2724 int new_effective_qos;
2725
2726 canonicalize_resource_and_type(&resource, &resource_type);
2727
2728 override = find_qos_override(thread, resource, resource_type);
2729 if (first_override_for_resource && !override) {
2730 /* We need to allocate a new object. Drop the thread lock and
2731 * recheck afterwards in case someone else added the override
2732 */
2733 thread_mtx_unlock(thread);
2734 override_new = zalloc(thread_qos_override_zone);
2735 thread_mtx_lock(thread);
2736 override = find_qos_override(thread, resource, resource_type);
2737 }
2738 if (first_override_for_resource && override) {
2739 /* Someone else already allocated while the thread lock was dropped */
2740 override->override_contended_resource_count++;
2741 } else if (!override && override_new) {
2742 override = override_new;
2743 override_new = NULL;
2744 override->override_next = thread->overrides;
2745 /* since first_override_for_resource was TRUE */
2746 override->override_contended_resource_count = 1;
2747 override->override_resource = resource;
2748 override->override_resource_type = (int16_t)resource_type;
2749 override->override_qos = THREAD_QOS_UNSPECIFIED;
2750 thread->overrides = override;
2751 }
2752
2753 if (override) {
2754 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2755 override->override_qos = (int16_t)override_qos;
2756 } else {
2757 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2758 }
2759 }
2760
2761 /* Determine how to combine the various overrides into a single current
2762 * requested override
2763 */
2764 new_qos_override = calculate_requested_qos_override(thread);
2765
2766 prev_qos_override = proc_get_thread_policy_locked(thread,
2767 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2768
2769 if (new_qos_override != prev_qos_override) {
2770 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2771 TASK_POLICY_QOS_OVERRIDE,
2772 new_qos_override, 0, &pend_token);
2773 }
2774
2775 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2776
2777 thread_mtx_unlock(thread);
2778
2779 thread_policy_update_complete_unlocked(thread, &pend_token);
2780
2781 if (override_new) {
2782 zfree(thread_qos_override_zone, override_new);
2783 }
2784
2785 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2786 int, new_qos_override, int, new_effective_qos, int, rc);
2787
2788 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2789 new_qos_override, resource, resource_type, 0, 0);
2790
2791 return rc;
2792 }
2793
2794 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2795 proc_thread_qos_add_override(task_t task,
2796 thread_t thread,
2797 uint64_t tid,
2798 int override_qos,
2799 boolean_t first_override_for_resource,
2800 user_addr_t resource,
2801 int resource_type)
2802 {
2803 boolean_t has_thread_reference = FALSE;
2804 int rc = 0;
2805
2806 if (thread == THREAD_NULL) {
2807 thread = task_findtid(task, tid);
2808 /* returns referenced thread */
2809
2810 if (thread == THREAD_NULL) {
2811 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2812 tid, 0, 0xdead, 0, 0);
2813 return ESRCH;
2814 }
2815 has_thread_reference = TRUE;
2816 } else {
2817 assert(get_threadtask(thread) == task);
2818 }
2819 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2820 first_override_for_resource, resource, resource_type);
2821 if (has_thread_reference) {
2822 thread_deallocate(thread);
2823 }
2824
2825 return rc;
2826 }
2827
2828 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2829 proc_thread_qos_remove_override_internal(thread_t thread,
2830 user_addr_t resource,
2831 int resource_type,
2832 boolean_t reset)
2833 {
2834 struct task_pend_token pend_token = {};
2835
2836 struct thread_qos_override *deferred_free_override_list = NULL;
2837 int new_qos_override, prev_qos_override, new_effective_qos;
2838
2839 thread_mtx_lock(thread);
2840
2841 canonicalize_resource_and_type(&resource, &resource_type);
2842
2843 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2844
2845 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2846 thread_tid(thread), resource, reset, 0, 0);
2847
2848 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2849 uint64_t, thread->requested_policy.thrp_qos,
2850 uint64_t, thread->effective_policy.thep_qos);
2851
2852 /* Determine how to combine the various overrides into a single current requested override */
2853 new_qos_override = calculate_requested_qos_override(thread);
2854
2855 spl_t s = splsched();
2856 thread_lock(thread);
2857
2858 /*
2859 * The override chain and therefore the value of the current override is locked with thread mutex,
2860 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2861 * This means you can't change the current override from a spinlock-only setter.
2862 */
2863 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2864
2865 if (new_qos_override != prev_qos_override) {
2866 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2867 }
2868
2869 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2870
2871 thread_unlock(thread);
2872 splx(s);
2873
2874 thread_mtx_unlock(thread);
2875
2876 thread_policy_update_complete_unlocked(thread, &pend_token);
2877
2878 while (deferred_free_override_list) {
2879 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2880
2881 zfree(thread_qos_override_zone, deferred_free_override_list);
2882 deferred_free_override_list = override_next;
2883 }
2884
2885 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2886 int, new_qos_override, int, new_effective_qos);
2887
2888 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2889 thread_tid(thread), 0, 0, 0, 0);
2890 }
2891
2892 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2893 proc_thread_qos_remove_override(task_t task,
2894 thread_t thread,
2895 uint64_t tid,
2896 user_addr_t resource,
2897 int resource_type)
2898 {
2899 boolean_t has_thread_reference = FALSE;
2900
2901 if (thread == THREAD_NULL) {
2902 thread = task_findtid(task, tid);
2903 /* returns referenced thread */
2904
2905 if (thread == THREAD_NULL) {
2906 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2907 tid, 0, 0xdead, 0, 0);
2908 return ESRCH;
2909 }
2910 has_thread_reference = TRUE;
2911 } else {
2912 assert(task == get_threadtask(thread));
2913 }
2914
2915 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2916
2917 if (has_thread_reference) {
2918 thread_deallocate(thread);
2919 }
2920
2921 return 0;
2922 }
2923
2924 /* Deallocate before thread termination */
2925 void
proc_thread_qos_deallocate(thread_t thread)2926 proc_thread_qos_deallocate(thread_t thread)
2927 {
2928 /* This thread must have no more IPC overrides. */
2929 assert(thread->kevent_overrides == 0);
2930 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2931 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2932
2933 /*
2934 * Clear out any lingering override objects.
2935 */
2936 struct thread_qos_override *override;
2937
2938 thread_mtx_lock(thread);
2939 override = thread->overrides;
2940 thread->overrides = NULL;
2941 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2942 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2943 thread_mtx_unlock(thread);
2944
2945 while (override) {
2946 struct thread_qos_override *override_next = override->override_next;
2947
2948 zfree(thread_qos_override_zone, override);
2949 override = override_next;
2950 }
2951 }
2952
2953 /*
2954 * Set up the primordial thread's QoS
2955 */
2956 void
task_set_main_thread_qos(task_t task,thread_t thread)2957 task_set_main_thread_qos(task_t task, thread_t thread)
2958 {
2959 struct task_pend_token pend_token = {};
2960
2961 assert(get_threadtask(thread) == task);
2962
2963 thread_mtx_lock(thread);
2964
2965 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2966 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2967 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2968 thread->requested_policy.thrp_qos, 0);
2969
2970 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2971
2972 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2973 primordial_qos, 0, &pend_token);
2974
2975 thread_mtx_unlock(thread);
2976
2977 thread_policy_update_complete_unlocked(thread, &pend_token);
2978
2979 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2980 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2981 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2982 primordial_qos, 0);
2983 }
2984
2985 /*
2986 * KPI for pthread kext
2987 *
2988 * Return a good guess at what the initial manager QoS will be
2989 * Dispatch can override this in userspace if it so chooses
2990 */
2991 thread_qos_t
task_get_default_manager_qos(task_t task)2992 task_get_default_manager_qos(task_t task)
2993 {
2994 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2995
2996 if (primordial_qos == THREAD_QOS_LEGACY) {
2997 primordial_qos = THREAD_QOS_USER_INITIATED;
2998 }
2999
3000 return primordial_qos;
3001 }
3002
3003 /*
3004 * Check if the kernel promotion on thread has changed
3005 * and apply it.
3006 *
3007 * thread locked on entry and exit
3008 */
3009 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3010 thread_recompute_kernel_promotion_locked(thread_t thread)
3011 {
3012 boolean_t needs_update = FALSE;
3013 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3014
3015 /*
3016 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3017 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3018 * and propagates the priority through the chain with the same cap, because as of now it does
3019 * not differenciate on the kernel primitive.
3020 *
3021 * If this assumption will change with the adoption of a kernel primitive that does not
3022 * cap the when adding/propagating,
3023 * then here is the place to put the generic cap for all kernel primitives
3024 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3025 */
3026 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3027
3028 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3029 KDBG(MACHDBG_CODE(
3030 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3031 thread_tid(thread),
3032 kern_promotion_schedpri,
3033 thread->kern_promotion_schedpri);
3034
3035 needs_update = TRUE;
3036 thread->kern_promotion_schedpri = kern_promotion_schedpri;
3037 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3038 }
3039
3040 return needs_update;
3041 }
3042
3043 /*
3044 * Check if the user promotion on thread has changed
3045 * and apply it.
3046 *
3047 * thread locked on entry, might drop the thread lock
3048 * and reacquire it.
3049 */
3050 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3051 thread_recompute_user_promotion_locked(thread_t thread)
3052 {
3053 boolean_t needs_update = FALSE;
3054 struct task_pend_token pend_token = {};
3055 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3056 int old_base_pri = thread->base_pri;
3057 thread_qos_t qos_promotion;
3058
3059 /* Check if user promotion has changed */
3060 if (thread->user_promotion_basepri == user_promotion_basepri) {
3061 return needs_update;
3062 } else {
3063 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3064 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3065 thread_tid(thread),
3066 user_promotion_basepri,
3067 thread->user_promotion_basepri,
3068 0, 0);
3069 KDBG(MACHDBG_CODE(
3070 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3071 thread_tid(thread),
3072 user_promotion_basepri,
3073 thread->user_promotion_basepri);
3074 }
3075
3076 /* Update the user promotion base pri */
3077 thread->user_promotion_basepri = user_promotion_basepri;
3078 pend_token.tpt_force_recompute_pri = 1;
3079
3080 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3081 qos_promotion = THREAD_QOS_UNSPECIFIED;
3082 } else {
3083 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3084 }
3085
3086 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3087 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3088
3089 if (thread_get_waiting_turnstile(thread) &&
3090 thread->base_pri != old_base_pri) {
3091 needs_update = TRUE;
3092 }
3093
3094 thread_unlock(thread);
3095
3096 thread_policy_update_complete_unlocked(thread, &pend_token);
3097
3098 thread_lock(thread);
3099
3100 return needs_update;
3101 }
3102
3103 /*
3104 * Convert the thread user promotion base pri to qos for threads in qos world.
3105 * For priority above UI qos, the qos would be set to UI.
3106 */
3107 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3108 thread_user_promotion_qos_for_pri(int priority)
3109 {
3110 thread_qos_t qos;
3111 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3112 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3113 return qos;
3114 }
3115 }
3116 return THREAD_QOS_MAINTENANCE;
3117 }
3118
3119 /*
3120 * Set the thread's QoS Kevent override
3121 * Owned by the Kevent subsystem
3122 *
3123 * May be called with spinlocks held, but not spinlocks
3124 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3125 *
3126 * One 'add' must be balanced by one 'drop'.
3127 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3128 * Before the thread is deallocated, there must be 0 remaining overrides.
3129 */
3130 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3131 thread_kevent_override(thread_t thread,
3132 uint32_t qos_override,
3133 boolean_t is_new_override)
3134 {
3135 struct task_pend_token pend_token = {};
3136 boolean_t needs_update;
3137
3138 spl_t s = splsched();
3139 thread_lock(thread);
3140
3141 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3142
3143 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3144 assert(qos_override < THREAD_QOS_LAST);
3145
3146 if (is_new_override) {
3147 if (thread->kevent_overrides++ == 0) {
3148 /* This add is the first override for this thread */
3149 assert(old_override == THREAD_QOS_UNSPECIFIED);
3150 } else {
3151 /* There are already other overrides in effect for this thread */
3152 assert(old_override > THREAD_QOS_UNSPECIFIED);
3153 }
3154 } else {
3155 /* There must be at least one override (the previous add call) in effect */
3156 assert(thread->kevent_overrides > 0);
3157 assert(old_override > THREAD_QOS_UNSPECIFIED);
3158 }
3159
3160 /*
3161 * We can't allow lowering if there are several IPC overrides because
3162 * the caller can't possibly know the whole truth
3163 */
3164 if (thread->kevent_overrides == 1) {
3165 needs_update = qos_override != old_override;
3166 } else {
3167 needs_update = qos_override > old_override;
3168 }
3169
3170 if (needs_update) {
3171 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3172 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3173 qos_override, 0, &pend_token);
3174 assert(pend_token.tpt_update_sockets == 0);
3175 }
3176
3177 thread_unlock(thread);
3178 splx(s);
3179
3180 thread_policy_update_complete_unlocked(thread, &pend_token);
3181 }
3182
3183 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3184 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3185 {
3186 thread_kevent_override(thread, qos_override, TRUE);
3187 }
3188
3189 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3190 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3191 {
3192 thread_kevent_override(thread, qos_override, FALSE);
3193 }
3194
3195 void
thread_drop_kevent_override(thread_t thread)3196 thread_drop_kevent_override(thread_t thread)
3197 {
3198 struct task_pend_token pend_token = {};
3199
3200 spl_t s = splsched();
3201 thread_lock(thread);
3202
3203 assert(thread->kevent_overrides > 0);
3204
3205 if (--thread->kevent_overrides == 0) {
3206 /*
3207 * There are no more overrides for this thread, so we should
3208 * clear out the saturated override value
3209 */
3210
3211 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3212 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3213 0, &pend_token);
3214 }
3215
3216 thread_unlock(thread);
3217 splx(s);
3218
3219 thread_policy_update_complete_unlocked(thread, &pend_token);
3220 }
3221
3222 /*
3223 * Set the thread's QoS Workloop Servicer override
3224 * Owned by the Kevent subsystem
3225 *
3226 * May be called with spinlocks held, but not spinlocks
3227 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3228 *
3229 * One 'add' must be balanced by one 'drop'.
3230 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3231 * Before the thread is deallocated, there must be 0 remaining overrides.
3232 */
3233 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3234 thread_servicer_override(thread_t thread,
3235 uint32_t qos_override,
3236 boolean_t is_new_override)
3237 {
3238 struct task_pend_token pend_token = {};
3239
3240 spl_t s = splsched();
3241 thread_lock(thread);
3242
3243 if (is_new_override) {
3244 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3245 } else {
3246 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3247 }
3248
3249 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3250 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3251 qos_override, 0, &pend_token);
3252
3253 thread_unlock(thread);
3254 splx(s);
3255
3256 assert(pend_token.tpt_update_sockets == 0);
3257 thread_policy_update_complete_unlocked(thread, &pend_token);
3258 }
3259
3260 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3261 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3262 {
3263 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3264 assert(qos_override < THREAD_QOS_LAST);
3265
3266 thread_servicer_override(thread, qos_override, TRUE);
3267 }
3268
3269 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3270 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3271 {
3272 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3273 assert(qos_override < THREAD_QOS_LAST);
3274
3275 thread_servicer_override(thread, qos_override, FALSE);
3276 }
3277
3278 void
thread_drop_servicer_override(thread_t thread)3279 thread_drop_servicer_override(thread_t thread)
3280 {
3281 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3282 }
3283
3284 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3285 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3286 {
3287 struct task_pend_token pend_token = {};
3288 uint8_t current_iotier;
3289
3290 /* Check if the update is needed */
3291 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3292 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3293
3294 if (iotier_override == current_iotier) {
3295 return;
3296 }
3297
3298 spl_t s = splsched();
3299 thread_lock(thread);
3300
3301 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3302 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3303 iotier_override, 0, &pend_token);
3304
3305 thread_unlock(thread);
3306 splx(s);
3307
3308 assert(pend_token.tpt_update_sockets == 0);
3309 thread_policy_update_complete_unlocked(thread, &pend_token);
3310 }
3311
3312 /* Get current requested qos / relpri, may be called from spinlock context */
3313 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3314 thread_get_requested_qos(thread_t thread, int *relpri)
3315 {
3316 int relprio_value = 0;
3317 thread_qos_t qos;
3318
3319 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3320 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3321 if (relpri) {
3322 *relpri = -relprio_value;
3323 }
3324 return qos;
3325 }
3326
3327 /*
3328 * This function will promote the thread priority
3329 * since exec could block other threads calling
3330 * proc_find on the proc. This boost must be removed
3331 * via call to thread_clear_exec_promotion.
3332 *
3333 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3334 */
3335 void
thread_set_exec_promotion(thread_t thread)3336 thread_set_exec_promotion(thread_t thread)
3337 {
3338 spl_t s = splsched();
3339 thread_lock(thread);
3340
3341 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3342
3343 thread_unlock(thread);
3344 splx(s);
3345 }
3346
3347 /*
3348 * This function will clear the exec thread
3349 * promotion set on the thread by thread_set_exec_promotion.
3350 */
3351 void
thread_clear_exec_promotion(thread_t thread)3352 thread_clear_exec_promotion(thread_t thread)
3353 {
3354 spl_t s = splsched();
3355 thread_lock(thread);
3356
3357 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3358
3359 thread_unlock(thread);
3360 splx(s);
3361 }
3362
3363 #if CONFIG_SCHED_RT_ALLOW
3364
3365 /*
3366 * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3367 * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3368 * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3369 */
3370 static TUNABLE(
3371 bool,
3372 rt_allow_policy_enabled,
3373 "-rt-allow_policy-enable",
3374 false
3375 );
3376
3377 /*
3378 * When the RT allow policy is enabled and a thread allowed to become RT,
3379 * sometimes (if the processes RT allow policy is restricted) the thread will
3380 * have a CPU limit enforced. The following two tunables determine the
3381 * parameters for that CPU limit.
3382 */
3383
3384 /* % of the interval allowed to run. */
3385 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3386 "rt_allow_limit_percent", 70);
3387
3388 /* The length of interval in nanoseconds. */
3389 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3390 "rt_allow_limit_interval", 10);
3391
3392 static bool
thread_has_rt(thread_t thread)3393 thread_has_rt(thread_t thread)
3394 {
3395 return
3396 thread->sched_mode == TH_MODE_REALTIME ||
3397 thread->saved_mode == TH_MODE_REALTIME;
3398 }
3399
3400 /*
3401 * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3402 * up by the target thread via the ledger AST.
3403 */
3404 static void
thread_rt_set_cpulimit(thread_t thread)3405 thread_rt_set_cpulimit(thread_t thread)
3406 {
3407 /* Force reasonable values for the cpu limit. */
3408 const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3409 const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3410
3411 thread->t_ledger_req_percentage = percent;
3412 thread->t_ledger_req_interval_ms = interval_ms;
3413 thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3414
3415 thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3416 }
3417
3418 /* Similar to the above but removes any CPU limit. */
3419 static void
thread_rt_clear_cpulimit(thread_t thread)3420 thread_rt_clear_cpulimit(thread_t thread)
3421 {
3422 thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3423
3424 thread->t_ledger_req_percentage = 0;
3425 thread->t_ledger_req_interval_ms = 0;
3426 thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3427 }
3428
3429 /*
3430 * Evaluate RT policy for a thread, demoting and undemoting as needed.
3431 */
3432 void
thread_rt_evaluate(thread_t thread)3433 thread_rt_evaluate(thread_t thread)
3434 {
3435 task_t task = get_threadtask(thread);
3436 bool platform_binary = false;
3437
3438 /* If the RT allow policy is not enabled - nothing to do. */
3439 if (!rt_allow_policy_enabled) {
3440 return;
3441 }
3442
3443 /* User threads only. */
3444 if (task == kernel_task) {
3445 return;
3446 }
3447
3448 /* Check for platform binary. */
3449 platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3450
3451 spl_t s = splsched();
3452 thread_lock(thread);
3453
3454 const thread_work_interval_flags_t wi_flags =
3455 os_atomic_load(&thread->th_work_interval_flags, relaxed);
3456
3457 /*
3458 * RT threads which are not joined to a work interval which allows RT
3459 * threads are demoted. Once those conditions no longer hold, the thread
3460 * undemoted.
3461 */
3462 if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3463 if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3464 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3465 thread_tid(thread));
3466 sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3467 }
3468 } else {
3469 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3470 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3471 }
3472 }
3473
3474 /*
3475 * RT threads get a CPU limit unless they're part of a platform binary
3476 * task. If the thread is no longer RT, any existing CPU limit should be
3477 * removed.
3478 */
3479 bool set_ast = false;
3480 if (!platform_binary &&
3481 thread_has_rt(thread) &&
3482 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3483 thread_rt_set_cpulimit(thread);
3484 set_ast = true;
3485 }
3486
3487 if (!platform_binary &&
3488 !thread_has_rt(thread) &&
3489 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3490 thread_rt_clear_cpulimit(thread);
3491 set_ast = true;
3492 }
3493
3494 thread_unlock(thread);
3495 splx(s);
3496
3497 if (set_ast) {
3498 /* Ensure the target thread picks up any CPU limit change. */
3499 act_set_astledger(thread);
3500 }
3501 }
3502
3503 #else
3504
3505 void
thread_rt_evaluate(__unused thread_t thread)3506 thread_rt_evaluate(__unused thread_t thread)
3507 {
3508 }
3509
3510 #endif /* CONFIG_SCHED_RT_ALLOW */
3511