1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <libkern/OSAtomic.h>
30 #include <mach/mach_types.h>
31 #include <mach/thread_act_server.h>
32
33 #include <kern/kern_types.h>
34 #include <kern/processor.h>
35 #include <kern/thread.h>
36 #include <kern/affinity.h>
37 #include <kern/work_interval.h>
38 #include <mach/task_policy.h>
39 #include <kern/sfi.h>
40 #include <kern/policy_internal.h>
41 #include <sys/errno.h>
42 #include <sys/ulock.h>
43
44 #include <mach/machine/sdt.h>
45
46 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
47 struct thread_qos_override, KT_DEFAULT);
48
49 #ifdef MACH_BSD
50 extern int proc_selfpid(void);
51 extern char * proc_name_address(void *p);
52 extern void rethrottle_thread(void * uthread);
53 #endif /* MACH_BSD */
54
55 #define QOS_EXTRACT(q) ((q) & 0xff)
56
57 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
58 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
60 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
61
62 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
63 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
64
65 static void
66 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
67
68 const int thread_default_iotier_override = THROTTLE_LEVEL_END;
69
70 const struct thread_requested_policy default_thread_requested_policy = {
71 .thrp_iotier_kevent_override = thread_default_iotier_override
72 };
73
74 /*
75 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
76 * to threads that don't have a QoS class set.
77 */
78 const qos_policy_params_t thread_qos_policy_params = {
79 /*
80 * This table defines the starting base priority of the thread,
81 * which will be modified by the thread importance and the task max priority
82 * before being applied.
83 */
84 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
85 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
86 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
87 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
88 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
89 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
90 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
91
92 /*
93 * This table defines the highest IO priority that a thread marked with this
94 * QoS class can have.
95 */
96 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
99 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
100 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
101 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
102 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
103
104 /*
105 * This table defines the highest QoS level that
106 * a thread marked with this QoS class can have.
107 */
108
109 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
110 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
111 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
113 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
114 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
116
117 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
118 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
119 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
121 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
124 };
125
126 static void
127 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
128
129 static int
130 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
131
132 static void
133 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
134
135 static void
136 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
137
138 static void
139 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
140
141 static void
142 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
143
144 static int
145 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
146
147 static int
148 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
149
150 static void
151 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
152
153 static void
154 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
155
156 boolean_t
thread_has_qos_policy(thread_t thread)157 thread_has_qos_policy(thread_t thread)
158 {
159 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
160 }
161
162
163 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)164 thread_remove_qos_policy_locked(thread_t thread,
165 task_pend_token_t pend_token)
166 {
167 __unused int prev_qos = thread->requested_policy.thrp_qos;
168
169 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
170
171 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
172 THREAD_QOS_UNSPECIFIED, 0, pend_token);
173 }
174
175 kern_return_t
thread_remove_qos_policy(thread_t thread)176 thread_remove_qos_policy(thread_t thread)
177 {
178 struct task_pend_token pend_token = {};
179
180 thread_mtx_lock(thread);
181 if (!thread->active) {
182 thread_mtx_unlock(thread);
183 return KERN_TERMINATED;
184 }
185
186 thread_remove_qos_policy_locked(thread, &pend_token);
187
188 thread_mtx_unlock(thread);
189
190 thread_policy_update_complete_unlocked(thread, &pend_token);
191
192 return KERN_SUCCESS;
193 }
194
195
196 boolean_t
thread_is_static_param(thread_t thread)197 thread_is_static_param(thread_t thread)
198 {
199 if (thread->static_param) {
200 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
201 return TRUE;
202 }
203 return FALSE;
204 }
205
206 /*
207 * Relative priorities can range between 0REL and -15REL. These
208 * map to QoS-specific ranges, to create non-overlapping priority
209 * ranges.
210 */
211 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)212 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
213 {
214 int next_lower_qos;
215
216 /* Fast path, since no validation or scaling is needed */
217 if (qos_relprio == 0) {
218 return 0;
219 }
220
221 switch (qos) {
222 case THREAD_QOS_USER_INTERACTIVE:
223 next_lower_qos = THREAD_QOS_USER_INITIATED;
224 break;
225 case THREAD_QOS_USER_INITIATED:
226 next_lower_qos = THREAD_QOS_LEGACY;
227 break;
228 case THREAD_QOS_LEGACY:
229 next_lower_qos = THREAD_QOS_UTILITY;
230 break;
231 case THREAD_QOS_UTILITY:
232 next_lower_qos = THREAD_QOS_BACKGROUND;
233 break;
234 case THREAD_QOS_MAINTENANCE:
235 case THREAD_QOS_BACKGROUND:
236 next_lower_qos = 0;
237 break;
238 default:
239 panic("Unrecognized QoS %d", qos);
240 return 0;
241 }
242
243 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
244 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
245
246 /*
247 * We now have the valid range that the scaled relative priority can map to. Note
248 * that the lower bound is exclusive, but the upper bound is inclusive. If the
249 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
250 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
251 * remainder.
252 */
253 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
254
255 return scaled_relprio;
256 }
257
258 /*
259 * flag set by -qos-policy-allow boot-arg to allow
260 * testing thread qos policy from userspace
261 */
262 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
263
264 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)265 thread_policy_set(
266 thread_t thread,
267 thread_policy_flavor_t flavor,
268 thread_policy_t policy_info,
269 mach_msg_type_number_t count)
270 {
271 thread_qos_policy_data_t req_qos;
272 kern_return_t kr;
273
274 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
275
276 if (thread == THREAD_NULL) {
277 return KERN_INVALID_ARGUMENT;
278 }
279
280 if (!allow_qos_policy_set) {
281 if (thread_is_static_param(thread)) {
282 return KERN_POLICY_STATIC;
283 }
284
285 if (flavor == THREAD_QOS_POLICY) {
286 return KERN_INVALID_ARGUMENT;
287 }
288
289 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
290 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
291 return KERN_INVALID_ARGUMENT;
292 }
293 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
294 if (info->priority != BASEPRI_RTQUEUES) {
295 return KERN_INVALID_ARGUMENT;
296 }
297 }
298 }
299
300 if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
301 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
302 &thread->th_work_interval_flags, relaxed);
303 if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
304 !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
305 /* Fail requests to become realtime for threads having joined workintervals
306 * with workload ID that don't have the rt-allowed flag. */
307 return KERN_INVALID_POLICY;
308 }
309 }
310
311 /* Threads without static_param set reset their QoS when other policies are applied. */
312 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
313 /* Store the existing tier, if we fail this call it is used to reset back. */
314 req_qos.qos_tier = thread->requested_policy.thrp_qos;
315 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
316
317 kr = thread_remove_qos_policy(thread);
318 if (kr != KERN_SUCCESS) {
319 return kr;
320 }
321 }
322
323 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
324
325 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
326 if (kr != KERN_SUCCESS) {
327 /* Reset back to our original tier as the set failed. */
328 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
329 }
330 }
331
332 return kr;
333 }
334
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
338 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
339
340 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)341 thread_policy_set_internal(
342 thread_t thread,
343 thread_policy_flavor_t flavor,
344 thread_policy_t policy_info,
345 mach_msg_type_number_t count)
346 {
347 kern_return_t result = KERN_SUCCESS;
348 struct task_pend_token pend_token = {};
349
350 thread_mtx_lock(thread);
351 if (!thread->active) {
352 thread_mtx_unlock(thread);
353
354 return KERN_TERMINATED;
355 }
356
357 switch (flavor) {
358 case THREAD_EXTENDED_POLICY:
359 {
360 boolean_t timeshare = TRUE;
361
362 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
363 thread_extended_policy_t info;
364
365 info = (thread_extended_policy_t)policy_info;
366 timeshare = info->timeshare;
367 }
368
369 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
370
371 spl_t s = splsched();
372 thread_lock(thread);
373
374 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
375
376 thread_unlock(thread);
377 splx(s);
378
379 /*
380 * The thread may be demoted with RT_DISALLOWED but has just
381 * changed its sched mode to TIMESHARE or FIXED. Make sure to
382 * undemote the thread so the new sched mode takes effect.
383 */
384 thread_rt_evaluate(thread);
385
386 pend_token.tpt_update_thread_sfi = 1;
387
388 break;
389 }
390
391 case THREAD_TIME_CONSTRAINT_POLICY:
392 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
393 {
394 thread_time_constraint_with_priority_policy_t info;
395
396 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
397 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
398 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
399
400 if (count < min_count) {
401 result = KERN_INVALID_ARGUMENT;
402 break;
403 }
404
405 info = (thread_time_constraint_with_priority_policy_t)policy_info;
406
407
408 if (info->constraint < info->computation ||
409 info->computation > max_rt_quantum ||
410 info->computation < min_rt_quantum) {
411 result = KERN_INVALID_ARGUMENT;
412 break;
413 }
414
415 if (info->computation < (info->constraint / 2)) {
416 info->computation = (info->constraint / 2);
417 if (info->computation > max_rt_quantum) {
418 info->computation = max_rt_quantum;
419 }
420 }
421
422 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
423 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
424 result = KERN_INVALID_ARGUMENT;
425 break;
426 }
427 }
428
429 spl_t s = splsched();
430 thread_lock(thread);
431
432 thread->realtime.period = info->period;
433 thread->realtime.computation = info->computation;
434 thread->realtime.constraint = info->constraint;
435 thread->realtime.preemptible = info->preemptible;
436
437 /*
438 * If the thread has a work interval driven policy, the priority
439 * offset has been set by the work interval.
440 */
441 if (!thread->requested_policy.thrp_wi_driven) {
442 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
443 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
444 } else {
445 thread->realtime.priority_offset = 0;
446 }
447 }
448
449 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
450
451 thread_unlock(thread);
452 splx(s);
453
454 thread_rt_evaluate(thread);
455
456 pend_token.tpt_update_thread_sfi = 1;
457
458 break;
459 }
460
461 case THREAD_PRECEDENCE_POLICY:
462 {
463 thread_precedence_policy_t info;
464
465 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
466 result = KERN_INVALID_ARGUMENT;
467 break;
468 }
469 info = (thread_precedence_policy_t)policy_info;
470
471 spl_t s = splsched();
472 thread_lock(thread);
473
474 thread->importance = info->importance;
475
476 thread_recompute_priority(thread);
477
478 thread_unlock(thread);
479 splx(s);
480
481 break;
482 }
483
484 case THREAD_AFFINITY_POLICY:
485 {
486 extern boolean_t affinity_sets_enabled;
487 thread_affinity_policy_t info;
488
489 if (!affinity_sets_enabled) {
490 result = KERN_INVALID_POLICY;
491 break;
492 }
493
494 if (!thread_affinity_is_supported()) {
495 result = KERN_NOT_SUPPORTED;
496 break;
497 }
498 if (count < THREAD_AFFINITY_POLICY_COUNT) {
499 result = KERN_INVALID_ARGUMENT;
500 break;
501 }
502
503 info = (thread_affinity_policy_t) policy_info;
504 /*
505 * Unlock the thread mutex here and
506 * return directly after calling thread_affinity_set().
507 * This is necessary for correct lock ordering because
508 * thread_affinity_set() takes the task lock.
509 */
510 thread_mtx_unlock(thread);
511 return thread_affinity_set(thread, info->affinity_tag);
512 }
513
514 #if !defined(XNU_TARGET_OS_OSX)
515 case THREAD_BACKGROUND_POLICY:
516 {
517 thread_background_policy_t info;
518
519 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
520 result = KERN_INVALID_ARGUMENT;
521 break;
522 }
523
524 if (get_threadtask(thread) != current_task()) {
525 result = KERN_PROTECTION_FAILURE;
526 break;
527 }
528
529 info = (thread_background_policy_t) policy_info;
530
531 int enable;
532
533 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
534 enable = TASK_POLICY_ENABLE;
535 } else {
536 enable = TASK_POLICY_DISABLE;
537 }
538
539 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
540
541 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
542
543 break;
544 }
545 #endif /* !defined(XNU_TARGET_OS_OSX) */
546
547 case THREAD_THROUGHPUT_QOS_POLICY:
548 {
549 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
550 thread_throughput_qos_t tqos;
551
552 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
553 result = KERN_INVALID_ARGUMENT;
554 break;
555 }
556
557 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
558 break;
559 }
560
561 tqos = qos_extract(info->thread_throughput_qos_tier);
562
563 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
564 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
565
566 break;
567 }
568
569 case THREAD_LATENCY_QOS_POLICY:
570 {
571 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
572 thread_latency_qos_t lqos;
573
574 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
575 result = KERN_INVALID_ARGUMENT;
576 break;
577 }
578
579 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
580 break;
581 }
582
583 lqos = qos_extract(info->thread_latency_qos_tier);
584
585 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
586 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
587
588 break;
589 }
590
591 case THREAD_QOS_POLICY:
592 {
593 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
594
595 if (count < THREAD_QOS_POLICY_COUNT) {
596 result = KERN_INVALID_ARGUMENT;
597 break;
598 }
599
600 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
601 result = KERN_INVALID_ARGUMENT;
602 break;
603 }
604
605 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
606 result = KERN_INVALID_ARGUMENT;
607 break;
608 }
609
610 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
611 result = KERN_INVALID_ARGUMENT;
612 break;
613 }
614
615 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
616 info->qos_tier, -info->tier_importance, &pend_token);
617
618 break;
619 }
620
621 default:
622 result = KERN_INVALID_ARGUMENT;
623 break;
624 }
625
626 thread_mtx_unlock(thread);
627
628 thread_policy_update_complete_unlocked(thread, &pend_token);
629
630 return result;
631 }
632
633 /*
634 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
635 * Both result in FIXED mode scheduling.
636 */
637 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)638 convert_policy_to_sched_mode(integer_t policy)
639 {
640 switch (policy) {
641 case POLICY_TIMESHARE:
642 return TH_MODE_TIMESHARE;
643 case POLICY_RR:
644 case POLICY_FIFO:
645 return TH_MODE_FIXED;
646 default:
647 panic("unexpected sched policy: %d", policy);
648 return TH_MODE_NONE;
649 }
650 }
651
652 /*
653 * Called either with the thread mutex locked
654 * or from the pthread kext in a 'safe place'.
655 */
656 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)657 thread_set_mode_and_absolute_pri_internal(thread_t thread,
658 sched_mode_t mode,
659 integer_t priority,
660 task_pend_token_t pend_token)
661 {
662 kern_return_t kr = KERN_SUCCESS;
663
664 spl_t s = splsched();
665 thread_lock(thread);
666
667 /* This path isn't allowed to change a thread out of realtime. */
668 if ((thread->sched_mode == TH_MODE_REALTIME) ||
669 (thread->saved_mode == TH_MODE_REALTIME)) {
670 kr = KERN_FAILURE;
671 goto unlock;
672 }
673
674 if (thread->policy_reset) {
675 kr = KERN_SUCCESS;
676 goto unlock;
677 }
678
679 sched_mode_t old_mode = thread->sched_mode;
680 integer_t old_base_pri = thread->base_pri;
681 integer_t old_sched_pri = thread->sched_pri;
682
683 /*
684 * Reverse engineer and apply the correct importance value
685 * from the requested absolute priority value.
686 *
687 * TODO: Store the absolute priority value instead
688 */
689
690 if (priority >= thread->max_priority) {
691 priority = thread->max_priority - thread->task_priority;
692 } else if (priority >= MINPRI_KERNEL) {
693 priority -= MINPRI_KERNEL;
694 } else if (priority >= MINPRI_RESERVED) {
695 priority -= MINPRI_RESERVED;
696 } else {
697 priority -= BASEPRI_DEFAULT;
698 }
699
700 priority += thread->task_priority;
701
702 if (priority > thread->max_priority) {
703 priority = thread->max_priority;
704 } else if (priority < MINPRI) {
705 priority = MINPRI;
706 }
707
708 thread->importance = priority - thread->task_priority;
709
710 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
711
712 if (mode != old_mode) {
713 pend_token->tpt_update_thread_sfi = 1;
714 }
715
716 if (thread->base_pri != old_base_pri ||
717 thread->sched_pri != old_sched_pri) {
718 pend_token->tpt_update_turnstile = 1;
719 }
720
721 unlock:
722 thread_unlock(thread);
723 splx(s);
724
725 return kr;
726 }
727
728 void
thread_freeze_base_pri(thread_t thread)729 thread_freeze_base_pri(thread_t thread)
730 {
731 assert(thread == current_thread());
732
733 spl_t s = splsched();
734 thread_lock(thread);
735
736 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
737 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
738
739 thread_unlock(thread);
740 splx(s);
741 }
742
743 bool
thread_unfreeze_base_pri(thread_t thread)744 thread_unfreeze_base_pri(thread_t thread)
745 {
746 assert(thread == current_thread());
747 integer_t base_pri;
748 ast_t ast = 0;
749
750 spl_t s = splsched();
751 thread_lock(thread);
752
753 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
754 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
755
756 base_pri = thread->req_base_pri;
757 if (base_pri != thread->base_pri) {
758 /*
759 * This function returns "true" if the base pri change
760 * is the most likely cause for the preemption.
761 */
762 sched_set_thread_base_priority(thread, base_pri);
763 ast = ast_peek(AST_PREEMPT);
764 }
765
766 thread_unlock(thread);
767 splx(s);
768
769 return ast != 0;
770 }
771
772 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)773 thread_workq_pri_for_qos(thread_qos_t qos)
774 {
775 assert(qos < THREAD_QOS_LAST);
776 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
777 }
778
779 thread_qos_t
thread_workq_qos_for_pri(int priority)780 thread_workq_qos_for_pri(int priority)
781 {
782 thread_qos_t qos;
783 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
784 // indicate that workq should map >UI threads to workq's
785 // internal notation for above-UI work.
786 return THREAD_QOS_UNSPECIFIED;
787 }
788 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
789 // map a given priority up to the next nearest qos band.
790 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
791 return qos;
792 }
793 }
794 return THREAD_QOS_MAINTENANCE;
795 }
796
797 /*
798 * private interface for pthread workqueues
799 *
800 * Set scheduling policy & absolute priority for thread
801 * May be called with spinlocks held
802 * Thread mutex lock is not held
803 */
804 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)805 thread_reset_workq_qos(thread_t thread, uint32_t qos)
806 {
807 struct task_pend_token pend_token = {};
808
809 assert(qos < THREAD_QOS_LAST);
810
811 spl_t s = splsched();
812 thread_lock(thread);
813
814 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
815 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
816 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
817 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
818 &pend_token);
819
820 assert(pend_token.tpt_update_sockets == 0);
821
822 thread_unlock(thread);
823 splx(s);
824
825 thread_policy_update_complete_unlocked(thread, &pend_token);
826 }
827
828 /*
829 * private interface for pthread workqueues
830 *
831 * Set scheduling policy & absolute priority for thread
832 * May be called with spinlocks held
833 * Thread mutex lock is held
834 */
835 void
thread_set_workq_override(thread_t thread,uint32_t qos)836 thread_set_workq_override(thread_t thread, uint32_t qos)
837 {
838 struct task_pend_token pend_token = {};
839
840 assert(qos < THREAD_QOS_LAST);
841
842 spl_t s = splsched();
843 thread_lock(thread);
844
845 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
846 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
847
848 assert(pend_token.tpt_update_sockets == 0);
849
850 thread_unlock(thread);
851 splx(s);
852
853 thread_policy_update_complete_unlocked(thread, &pend_token);
854 }
855
856 /*
857 * private interface for pthread workqueues
858 *
859 * Set scheduling policy & absolute priority for thread
860 * May be called with spinlocks held
861 * Thread mutex lock is not held
862 */
863 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)864 thread_set_workq_pri(thread_t thread,
865 thread_qos_t qos,
866 integer_t priority,
867 integer_t policy)
868 {
869 struct task_pend_token pend_token = {};
870 sched_mode_t mode = convert_policy_to_sched_mode(policy);
871
872 assert(qos < THREAD_QOS_LAST);
873 assert(thread->static_param);
874
875 if (!thread->static_param || !thread->active) {
876 return;
877 }
878
879 spl_t s = splsched();
880 thread_lock(thread);
881
882 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
883 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
884 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
885 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
886 0, &pend_token);
887
888 thread_unlock(thread);
889 splx(s);
890
891 /* Concern: this doesn't hold the mutex... */
892
893 __assert_only kern_return_t kr;
894 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
895 &pend_token);
896 assert(kr == KERN_SUCCESS);
897
898 assert(pend_token.tpt_update_sockets == 0);
899
900 thread_policy_update_complete_unlocked(thread, &pend_token);
901 }
902
903 /*
904 * thread_set_mode_and_absolute_pri:
905 *
906 * Set scheduling policy & absolute priority for thread, for deprecated
907 * thread_set_policy and thread_policy interfaces.
908 *
909 * Called with nothing locked.
910 */
911 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)912 thread_set_mode_and_absolute_pri(thread_t thread,
913 integer_t policy,
914 integer_t priority)
915 {
916 kern_return_t kr = KERN_SUCCESS;
917 struct task_pend_token pend_token = {};
918
919 sched_mode_t mode = convert_policy_to_sched_mode(policy);
920
921 thread_mtx_lock(thread);
922
923 if (!thread->active) {
924 kr = KERN_TERMINATED;
925 goto unlock;
926 }
927
928 if (thread_is_static_param(thread)) {
929 kr = KERN_POLICY_STATIC;
930 goto unlock;
931 }
932
933 /* Setting legacy policies on threads kills the current QoS */
934 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
935 thread_remove_qos_policy_locked(thread, &pend_token);
936 }
937
938 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
939
940 unlock:
941 thread_mtx_unlock(thread);
942
943 thread_policy_update_complete_unlocked(thread, &pend_token);
944
945 return kr;
946 }
947
948 /*
949 * Set the thread's requested mode and recompute priority
950 * Called with thread mutex and thread locked
951 *
952 * TODO: Mitigate potential problems caused by moving thread to end of runq
953 * whenever its priority is recomputed
954 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
955 */
956 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)957 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
958 {
959 if (thread->policy_reset) {
960 return;
961 }
962
963 boolean_t removed = thread_run_queue_remove(thread);
964
965 sched_set_thread_mode_user(thread, mode);
966
967 thread_recompute_priority(thread);
968
969 if (removed) {
970 thread_run_queue_reinsert(thread, SCHED_TAILQ);
971 }
972 }
973
974 /* called at splsched with thread lock locked */
975 static void
thread_update_qos_cpu_time_locked(thread_t thread)976 thread_update_qos_cpu_time_locked(thread_t thread)
977 {
978 task_t task = get_threadtask(thread);
979 uint64_t timer_sum, timer_delta;
980
981 /*
982 * This is only as accurate the thread's last context switch or user/kernel
983 * transition (unless precise user/kernel time is disabled).
984 *
985 * TODO: Consider running an update operation here to update it first.
986 * Maybe doable with interrupts disabled from current thread.
987 * If the thread is on a different core, may not be easy to get right.
988 */
989
990 timer_sum = recount_thread_time_mach(thread);
991 timer_delta = timer_sum - thread->vtimer_qos_save;
992
993 thread->vtimer_qos_save = timer_sum;
994
995 uint64_t* task_counter = NULL;
996
997 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
998 switch (thread->effective_policy.thep_qos) {
999 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
1000 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
1001 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1002 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1003 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1004 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1005 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1006 default:
1007 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1008 }
1009
1010 OSAddAtomic64(timer_delta, task_counter);
1011
1012 /* Update the task-level qos stats atomically, because we don't have the task lock. */
1013 switch (thread->requested_policy.thrp_qos) {
1014 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1015 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1016 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1017 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1018 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1019 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1020 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1021 default:
1022 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1023 }
1024
1025 OSAddAtomic64(timer_delta, task_counter);
1026 }
1027
1028 /*
1029 * called with no thread locks held
1030 * may hold task lock
1031 */
1032 void
thread_update_qos_cpu_time(thread_t thread)1033 thread_update_qos_cpu_time(thread_t thread)
1034 {
1035 thread_mtx_lock(thread);
1036
1037 spl_t s = splsched();
1038 thread_lock(thread);
1039
1040 thread_update_qos_cpu_time_locked(thread);
1041
1042 thread_unlock(thread);
1043 splx(s);
1044
1045 thread_mtx_unlock(thread);
1046 }
1047
1048 /*
1049 * Calculate base priority from thread attributes, and set it on the thread
1050 *
1051 * Called with thread_lock and thread mutex held.
1052 */
1053 void
thread_recompute_priority(thread_t thread)1054 thread_recompute_priority(
1055 thread_t thread)
1056 {
1057 integer_t priority;
1058 integer_t adj_priority;
1059 bool wi_priority = false;
1060
1061 if (thread->policy_reset) {
1062 return;
1063 }
1064
1065 if (thread->sched_mode == TH_MODE_REALTIME) {
1066 uint8_t i = thread->realtime.priority_offset;
1067 assert((i >= 0) && (i < NRTQS));
1068 priority = BASEPRI_RTQUEUES + i;
1069
1070 sched_set_thread_base_priority(thread, priority);
1071 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1072 /* Make sure the thread has a valid deadline */
1073 uint64_t ctime = mach_absolute_time();
1074 thread->realtime.deadline = thread->realtime.constraint + ctime;
1075 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1076 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1077 }
1078 return;
1079
1080 /*
1081 * A thread may have joined a RT work interval but then never
1082 * changed its sched mode or have been demoted. RT work
1083 * intervals will have RT priorities - ignore the priority if
1084 * the thread isn't RT.
1085 */
1086 } else if (thread->effective_policy.thep_wi_driven &&
1087 work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1088 priority = work_interval_get_priority(thread);
1089 wi_priority = true;
1090 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1091 int qos = thread->effective_policy.thep_qos;
1092 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1093 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1094 int qos_scaled_relprio;
1095
1096 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1097 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1098
1099 priority = thread_qos_policy_params.qos_pri[qos];
1100 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1101
1102 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1103 /* Bump priority 46 to 47 when in a frontmost app */
1104 qos_scaled_relprio += 1;
1105 }
1106
1107 /* TODO: factor in renice priority here? */
1108
1109 priority += qos_scaled_relprio;
1110 } else {
1111 if (thread->importance > MAXPRI) {
1112 priority = MAXPRI;
1113 } else if (thread->importance < -MAXPRI) {
1114 priority = -MAXPRI;
1115 } else {
1116 priority = thread->importance;
1117 }
1118
1119 priority += thread->task_priority;
1120 }
1121
1122 /* Boost the priority of threads which are RT demoted. */
1123 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1124 priority = MAX(priority, MAXPRI_USER);
1125 }
1126
1127 priority = MAX(priority, thread->user_promotion_basepri);
1128
1129 /*
1130 * Clamp priority back into the allowed range for this task.
1131 * The initial priority value could be out of this range due to:
1132 * Task clamped to BG or Utility (max-pri is 4, or 20)
1133 * Task is user task (max-pri is 63)
1134 * Task is kernel task (max-pri is 95)
1135 * Note that thread->importance is user-settable to any integer
1136 * via THREAD_PRECEDENCE_POLICY.
1137 */
1138 adj_priority = priority;
1139 adj_priority = MIN(adj_priority, thread->max_priority);
1140 adj_priority = MAX(adj_priority, MINPRI);
1141
1142 /* Allow workload driven priorities to exceed max_priority. */
1143 if (wi_priority) {
1144 adj_priority = MAX(adj_priority, priority);
1145 }
1146
1147 /* Allow priority to exceed max_priority for promotions. */
1148 if (thread->effective_policy.thep_promote_above_task) {
1149 adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1150 }
1151 priority = adj_priority;
1152 assert3u(priority, <=, MAXPRI);
1153
1154 if (thread->saved_mode == TH_MODE_REALTIME &&
1155 sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1156 priority = DEPRESSPRI;
1157 }
1158
1159 if (thread->effective_policy.thep_terminated == TRUE) {
1160 /*
1161 * We temporarily want to override the expected priority to
1162 * ensure that the thread exits in a timely manner.
1163 * Note that this is allowed to exceed thread->max_priority
1164 * so that the thread is no longer clamped to background
1165 * during the final exit phase.
1166 */
1167 if (priority < thread->task_priority) {
1168 priority = thread->task_priority;
1169 }
1170 if (priority < BASEPRI_DEFAULT) {
1171 priority = BASEPRI_DEFAULT;
1172 }
1173 }
1174
1175 #if !defined(XNU_TARGET_OS_OSX)
1176 /* No one can have a base priority less than MAXPRI_THROTTLE */
1177 if (priority < MAXPRI_THROTTLE) {
1178 priority = MAXPRI_THROTTLE;
1179 }
1180 #endif /* !defined(XNU_TARGET_OS_OSX) */
1181
1182 sched_set_thread_base_priority(thread, priority);
1183 }
1184
1185 /* Called with the task lock held, but not the thread mutex or spinlock */
1186 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1187 thread_policy_update_tasklocked(
1188 thread_t thread,
1189 integer_t priority,
1190 integer_t max_priority,
1191 task_pend_token_t pend_token)
1192 {
1193 thread_mtx_lock(thread);
1194
1195 if (!thread->active || thread->policy_reset) {
1196 thread_mtx_unlock(thread);
1197 return;
1198 }
1199
1200 spl_t s = splsched();
1201 thread_lock(thread);
1202
1203 __unused
1204 integer_t old_max_priority = thread->max_priority;
1205
1206 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1207 thread->task_priority = (int16_t)priority;
1208
1209 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1210 thread->max_priority = (int16_t)max_priority;
1211
1212 /*
1213 * When backgrounding a thread, realtime and fixed priority threads
1214 * should be demoted to timeshare background threads.
1215 *
1216 * TODO: Do this inside the thread policy update routine in order to avoid double
1217 * remove/reinsert for a runnable thread
1218 */
1219 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1220 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1221 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1222 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1223 }
1224
1225 thread_policy_update_spinlocked(thread, true, pend_token);
1226
1227 thread_unlock(thread);
1228 splx(s);
1229
1230 thread_mtx_unlock(thread);
1231 }
1232
1233 /*
1234 * Reset thread to default state in preparation for termination
1235 * Called with thread mutex locked
1236 *
1237 * Always called on current thread, so we don't need a run queue remove
1238 */
1239 void
thread_policy_reset(thread_t thread)1240 thread_policy_reset(
1241 thread_t thread)
1242 {
1243 spl_t s;
1244
1245 assert(thread == current_thread());
1246
1247 s = splsched();
1248 thread_lock(thread);
1249
1250 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1251 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1252 }
1253
1254 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1255 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1256 }
1257
1258 if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1259 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1260 }
1261
1262 /* At this point, the various demotions should be inactive */
1263 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1264 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1265
1266 /* Reset thread back to task-default basepri and mode */
1267 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1268
1269 sched_set_thread_mode(thread, newmode);
1270
1271 thread->importance = 0;
1272
1273 /* Prevent further changes to thread base priority or mode */
1274 thread->policy_reset = 1;
1275
1276 sched_set_thread_base_priority(thread, thread->task_priority);
1277
1278 thread_unlock(thread);
1279 splx(s);
1280 }
1281
1282 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1283 thread_policy_get(
1284 thread_t thread,
1285 thread_policy_flavor_t flavor,
1286 thread_policy_t policy_info,
1287 mach_msg_type_number_t *count,
1288 boolean_t *get_default)
1289 {
1290 kern_return_t result = KERN_SUCCESS;
1291
1292 if (thread == THREAD_NULL) {
1293 return KERN_INVALID_ARGUMENT;
1294 }
1295
1296 thread_mtx_lock(thread);
1297 if (!thread->active) {
1298 thread_mtx_unlock(thread);
1299
1300 return KERN_TERMINATED;
1301 }
1302
1303 switch (flavor) {
1304 case THREAD_EXTENDED_POLICY:
1305 {
1306 boolean_t timeshare = TRUE;
1307
1308 if (!(*get_default)) {
1309 spl_t s = splsched();
1310 thread_lock(thread);
1311
1312 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1313 (thread->saved_mode != TH_MODE_REALTIME)) {
1314 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1315 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1316 } else {
1317 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1318 }
1319 } else {
1320 *get_default = TRUE;
1321 }
1322
1323 thread_unlock(thread);
1324 splx(s);
1325 }
1326
1327 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1328 thread_extended_policy_t info;
1329
1330 info = (thread_extended_policy_t)policy_info;
1331 info->timeshare = timeshare;
1332 }
1333
1334 break;
1335 }
1336
1337 case THREAD_TIME_CONSTRAINT_POLICY:
1338 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1339 {
1340 thread_time_constraint_with_priority_policy_t info;
1341
1342 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1343 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1344 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1345
1346 if (*count < min_count) {
1347 result = KERN_INVALID_ARGUMENT;
1348 break;
1349 }
1350
1351 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1352
1353 if (!(*get_default)) {
1354 spl_t s = splsched();
1355 thread_lock(thread);
1356
1357 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1358 (thread->saved_mode == TH_MODE_REALTIME)) {
1359 info->period = thread->realtime.period;
1360 info->computation = thread->realtime.computation;
1361 info->constraint = thread->realtime.constraint;
1362 info->preemptible = thread->realtime.preemptible;
1363 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1364 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1365 }
1366 } else {
1367 *get_default = TRUE;
1368 }
1369
1370 thread_unlock(thread);
1371 splx(s);
1372 }
1373
1374 if (*get_default) {
1375 info->period = 0;
1376 info->computation = default_timeshare_computation;
1377 info->constraint = default_timeshare_constraint;
1378 info->preemptible = TRUE;
1379 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1380 info->priority = BASEPRI_RTQUEUES;
1381 }
1382 }
1383
1384
1385 break;
1386 }
1387
1388 case THREAD_PRECEDENCE_POLICY:
1389 {
1390 thread_precedence_policy_t info;
1391
1392 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1393 result = KERN_INVALID_ARGUMENT;
1394 break;
1395 }
1396
1397 info = (thread_precedence_policy_t)policy_info;
1398
1399 if (!(*get_default)) {
1400 spl_t s = splsched();
1401 thread_lock(thread);
1402
1403 info->importance = thread->importance;
1404
1405 thread_unlock(thread);
1406 splx(s);
1407 } else {
1408 info->importance = 0;
1409 }
1410
1411 break;
1412 }
1413
1414 case THREAD_AFFINITY_POLICY:
1415 {
1416 thread_affinity_policy_t info;
1417
1418 if (!thread_affinity_is_supported()) {
1419 result = KERN_NOT_SUPPORTED;
1420 break;
1421 }
1422 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1423 result = KERN_INVALID_ARGUMENT;
1424 break;
1425 }
1426
1427 info = (thread_affinity_policy_t)policy_info;
1428
1429 if (!(*get_default)) {
1430 info->affinity_tag = thread_affinity_get(thread);
1431 } else {
1432 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1433 }
1434
1435 break;
1436 }
1437
1438 case THREAD_POLICY_STATE:
1439 {
1440 thread_policy_state_t info;
1441
1442 if (*count < THREAD_POLICY_STATE_COUNT) {
1443 result = KERN_INVALID_ARGUMENT;
1444 break;
1445 }
1446
1447 /* Only root can get this info */
1448 if (!task_is_privileged(current_task())) {
1449 result = KERN_PROTECTION_FAILURE;
1450 break;
1451 }
1452
1453 info = (thread_policy_state_t)(void*)policy_info;
1454
1455 if (!(*get_default)) {
1456 info->flags = 0;
1457
1458 spl_t s = splsched();
1459 thread_lock(thread);
1460
1461 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1462
1463 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1464 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1465
1466 info->thps_user_promotions = 0;
1467 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1468 info->thps_ipc_overrides = thread->kevent_overrides;
1469
1470 proc_get_thread_policy_bitfield(thread, info);
1471
1472 thread_unlock(thread);
1473 splx(s);
1474 } else {
1475 info->requested = 0;
1476 info->effective = 0;
1477 info->pending = 0;
1478 }
1479
1480 break;
1481 }
1482
1483 case THREAD_REQUESTED_STATE_POLICY:
1484 {
1485 if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1486 result = KERN_INVALID_ARGUMENT;
1487 break;
1488 }
1489
1490 thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1491 struct thread_requested_policy *req_policy = &thread->requested_policy;
1492
1493 info->thrq_base_qos = req_policy->thrp_qos;
1494 info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1495 info->thrq_qos_override = req_policy->thrp_qos_override;
1496 info->thrq_qos_promote = req_policy->thrp_qos_promote;
1497 info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1498 info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1499 info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1500
1501 break;
1502 }
1503
1504 case THREAD_LATENCY_QOS_POLICY:
1505 {
1506 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1507 thread_latency_qos_t plqos;
1508
1509 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1510 result = KERN_INVALID_ARGUMENT;
1511 break;
1512 }
1513
1514 if (*get_default) {
1515 plqos = 0;
1516 } else {
1517 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1518 }
1519
1520 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1521 }
1522 break;
1523
1524 case THREAD_THROUGHPUT_QOS_POLICY:
1525 {
1526 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1527 thread_throughput_qos_t ptqos;
1528
1529 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1530 result = KERN_INVALID_ARGUMENT;
1531 break;
1532 }
1533
1534 if (*get_default) {
1535 ptqos = 0;
1536 } else {
1537 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1538 }
1539
1540 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1541 }
1542 break;
1543
1544 case THREAD_QOS_POLICY:
1545 {
1546 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1547
1548 if (*count < THREAD_QOS_POLICY_COUNT) {
1549 result = KERN_INVALID_ARGUMENT;
1550 break;
1551 }
1552
1553 if (!(*get_default)) {
1554 int relprio_value = 0;
1555 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1556 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1557
1558 info->tier_importance = -relprio_value;
1559 } else {
1560 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1561 info->tier_importance = 0;
1562 }
1563
1564 break;
1565 }
1566
1567 default:
1568 result = KERN_INVALID_ARGUMENT;
1569 break;
1570 }
1571
1572 thread_mtx_unlock(thread);
1573
1574 return result;
1575 }
1576
1577 void
thread_policy_create(thread_t thread)1578 thread_policy_create(thread_t thread)
1579 {
1580 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1581 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1582 thread_tid(thread), theffective_0(thread),
1583 theffective_1(thread), thread->base_pri, 0);
1584
1585 /* We pass a pend token but ignore it */
1586 struct task_pend_token pend_token = {};
1587
1588 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1589
1590 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1591 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1592 thread_tid(thread), theffective_0(thread),
1593 theffective_1(thread), thread->base_pri, 0);
1594 }
1595
1596 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1597 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1598 {
1599 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1600 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1601 thread_tid(thread), theffective_0(thread),
1602 theffective_1(thread), thread->base_pri, 0);
1603
1604 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1605
1606 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1607 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1608 thread_tid(thread), theffective_0(thread),
1609 theffective_1(thread), thread->base_pri, 0);
1610 }
1611
1612
1613
1614 /*
1615 * One thread state update function TO RULE THEM ALL
1616 *
1617 * This function updates the thread effective policy fields
1618 * and pushes the results to the relevant subsystems.
1619 *
1620 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1621 */
1622 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1623 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1624 task_pend_token_t pend_token)
1625 {
1626 /*
1627 * Step 1:
1628 * Gather requested policy and effective task state
1629 */
1630
1631 const struct thread_requested_policy requested = thread->requested_policy;
1632 const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1633
1634 /*
1635 * Step 2:
1636 * Calculate new effective policies from requested policy, task and thread state
1637 * Rules:
1638 * Don't change requested, it won't take effect
1639 */
1640
1641 struct thread_effective_policy next = {};
1642
1643 next.thep_wi_driven = requested.thrp_wi_driven;
1644
1645 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1646
1647 uint32_t next_qos = requested.thrp_qos;
1648
1649 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1650 next_qos = MAX(requested.thrp_qos_override, next_qos);
1651 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1652 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1653 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1654 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1655 }
1656
1657 if (task_effective.tep_darwinbg && task_effective.tep_promote_above_task &&
1658 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1659 /*
1660 * This thread is turnstile-boosted higher than the background clamp
1661 * by a synchronous waiter, and this clamp allows that to override the
1662 * clamp temporarily for this thread only.
1663 */
1664 next.thep_promote_above_task = true;
1665 next_qos = requested.thrp_qos_promote;
1666 }
1667
1668 next.thep_qos = next_qos;
1669
1670 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1671 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1672 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1673 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1674 } else {
1675 next.thep_qos = task_effective.tep_qos_clamp;
1676 }
1677 next.thep_wi_driven = 0;
1678 }
1679
1680 /*
1681 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1682 * This allows QoS promotions to work properly even after the process is unclamped.
1683 */
1684 next.thep_qos_promote = next.thep_qos;
1685
1686 /* The ceiling only applies to threads that are in the QoS world */
1687 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1688 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1689 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1690 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1691 }
1692
1693 /*
1694 * The QoS relative priority is only applicable when the original programmer's
1695 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1696 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1697 * since otherwise it would be lower than unclamped threads. Similarly, in the
1698 * presence of boosting, the programmer doesn't know what other actors
1699 * are boosting the thread.
1700 */
1701 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1702 (requested.thrp_qos == next.thep_qos) &&
1703 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1704 next.thep_qos_relprio = requested.thrp_qos_relprio;
1705 } else {
1706 next.thep_qos_relprio = 0;
1707 }
1708
1709 /* Calculate DARWIN_BG */
1710 bool wants_darwinbg = false;
1711 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1712
1713 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1714 wants_darwinbg = true;
1715 }
1716
1717 /*
1718 * If DARWIN_BG has been requested at either level, it's engaged.
1719 * darwinbg threads always create bg sockets,
1720 * but only some types of darwinbg change the sockets
1721 * after they're created
1722 */
1723 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1724 wants_all_sockets_bg = wants_darwinbg = true;
1725 }
1726
1727 if (requested.thrp_pidbind_bg) {
1728 wants_all_sockets_bg = wants_darwinbg = true;
1729 }
1730
1731 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1732 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1733 wants_darwinbg = true;
1734 }
1735
1736 /* Calculate side effects of DARWIN_BG */
1737
1738 if (wants_darwinbg) {
1739 next.thep_darwinbg = 1;
1740 next.thep_wi_driven = 0;
1741 }
1742
1743 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1744 next.thep_new_sockets_bg = 1;
1745 }
1746
1747 /* Don't use task_effective.tep_all_sockets_bg here */
1748 if (wants_all_sockets_bg) {
1749 next.thep_all_sockets_bg = 1;
1750 }
1751
1752 /* darwinbg implies background QOS (or lower) */
1753 if (next.thep_darwinbg &&
1754 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1755 next.thep_qos = THREAD_QOS_BACKGROUND;
1756 next.thep_qos_relprio = 0;
1757 }
1758
1759 /* Calculate IO policy */
1760
1761 int iopol = THROTTLE_LEVEL_TIER0;
1762
1763 /* Factor in the task's IO policy */
1764 if (next.thep_darwinbg) {
1765 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1766 }
1767
1768 if (!next.thep_promote_above_task) {
1769 iopol = MAX(iopol, task_effective.tep_io_tier);
1770 }
1771
1772 /* Look up the associated IO tier value for the QoS class */
1773 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1774
1775 iopol = MAX(iopol, requested.thrp_int_iotier);
1776 iopol = MAX(iopol, requested.thrp_ext_iotier);
1777
1778 /* Apply the kevent iotier override */
1779 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1780
1781 next.thep_io_tier = iopol;
1782
1783 /*
1784 * If a QoS override is causing IO to go into a lower tier, we also set
1785 * the passive bit so that a thread doesn't end up stuck in its own throttle
1786 * window when the override goes away.
1787 */
1788
1789 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1790 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1791 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1792
1793 /* Calculate Passive IO policy */
1794 if (requested.thrp_ext_iopassive ||
1795 requested.thrp_int_iopassive ||
1796 qos_io_override_active ||
1797 task_effective.tep_io_passive) {
1798 next.thep_io_passive = 1;
1799 }
1800
1801 /* Calculate timer QOS */
1802 uint32_t latency_qos = requested.thrp_latency_qos;
1803
1804 if (!next.thep_promote_above_task) {
1805 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1806 }
1807
1808 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1809
1810 next.thep_latency_qos = latency_qos;
1811
1812 /* Calculate throughput QOS */
1813 uint32_t through_qos = requested.thrp_through_qos;
1814
1815 if (!next.thep_promote_above_task) {
1816 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1817 }
1818
1819 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1820
1821 next.thep_through_qos = through_qos;
1822
1823 if (task_effective.tep_terminated || requested.thrp_terminated) {
1824 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1825 next.thep_terminated = 1;
1826 next.thep_darwinbg = 0;
1827 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1828 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1829 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1830 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1831 next.thep_wi_driven = 0;
1832 }
1833
1834 /*
1835 * Step 3:
1836 * Swap out old policy for new policy
1837 */
1838
1839 struct thread_effective_policy prev = thread->effective_policy;
1840
1841 thread_update_qos_cpu_time_locked(thread);
1842
1843 /* This is the point where the new values become visible to other threads */
1844 thread->effective_policy = next;
1845
1846 /*
1847 * Step 4:
1848 * Pend updates that can't be done while holding the thread lock
1849 */
1850
1851 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1852 pend_token->tpt_update_sockets = 1;
1853 }
1854
1855 /* TODO: Doesn't this only need to be done if the throttle went up? */
1856 if (prev.thep_io_tier != next.thep_io_tier) {
1857 pend_token->tpt_update_throttle = 1;
1858 }
1859
1860 /*
1861 * Check for the attributes that sfi_thread_classify() consults,
1862 * and trigger SFI re-evaluation.
1863 */
1864 if (prev.thep_qos != next.thep_qos ||
1865 prev.thep_darwinbg != next.thep_darwinbg) {
1866 pend_token->tpt_update_thread_sfi = 1;
1867 }
1868
1869 integer_t old_base_pri = thread->base_pri;
1870
1871 /* promote-above-task generates its own dedicated tracepoint */
1872 if (prev.thep_promote_above_task != next.thep_promote_above_task) {
1873 KDBG_RELEASE(IMPORTANCE_CODE(IMP_THREAD_PROMOTE_ABOVE_TASK, 0) |
1874 (next.thep_promote_above_task ? DBG_FUNC_START : DBG_FUNC_END),
1875 thread_tid(thread), next.thep_terminated);
1876 }
1877
1878 /*
1879 * Step 5:
1880 * Update other subsystems as necessary if something has changed
1881 */
1882
1883 /* Check for the attributes that thread_recompute_priority() consults */
1884 if (prev.thep_qos != next.thep_qos ||
1885 prev.thep_qos_relprio != next.thep_qos_relprio ||
1886 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1887 prev.thep_promote_above_task != next.thep_promote_above_task ||
1888 prev.thep_terminated != next.thep_terminated ||
1889 prev.thep_wi_driven != next.thep_wi_driven ||
1890 pend_token->tpt_force_recompute_pri == 1 ||
1891 recompute_priority) {
1892 thread_recompute_priority(thread);
1893 }
1894
1895 /*
1896 * Check if the thread is waiting on a turnstile and needs priority propagation.
1897 */
1898 if (pend_token->tpt_update_turnstile &&
1899 ((old_base_pri == thread->base_pri) ||
1900 !thread_get_waiting_turnstile(thread))) {
1901 /*
1902 * Reset update turnstile pend token since either
1903 * the thread priority did not change or thread is
1904 * not blocked on a turnstile.
1905 */
1906 pend_token->tpt_update_turnstile = 0;
1907 }
1908 }
1909
1910
1911 /*
1912 * Initiate a thread policy state transition on a thread with its TID
1913 * Useful if you cannot guarantee the thread won't get terminated
1914 * Precondition: No locks are held
1915 * Will take task lock - using the non-tid variant is faster
1916 * if you already have a thread ref.
1917 */
1918 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1919 proc_set_thread_policy_with_tid(task_t task,
1920 uint64_t tid,
1921 int category,
1922 int flavor,
1923 int value)
1924 {
1925 /* takes task lock, returns ref'ed thread or NULL */
1926 thread_t thread = task_findtid(task, tid);
1927
1928 if (thread == THREAD_NULL) {
1929 return;
1930 }
1931
1932 proc_set_thread_policy(thread, category, flavor, value);
1933
1934 thread_deallocate(thread);
1935 }
1936
1937 /*
1938 * Initiate a thread policy transition on a thread
1939 * This path supports networking transitions (i.e. darwinbg transitions)
1940 * Precondition: No locks are held
1941 */
1942 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1943 proc_set_thread_policy(thread_t thread,
1944 int category,
1945 int flavor,
1946 int value)
1947 {
1948 proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1949 }
1950
1951 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1952 proc_set_thread_policy_ext(thread_t thread,
1953 int category,
1954 int flavor,
1955 int value,
1956 int value2)
1957 {
1958 struct task_pend_token pend_token = {};
1959
1960 thread_mtx_lock(thread);
1961
1962 proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1963
1964 thread_mtx_unlock(thread);
1965
1966 thread_policy_update_complete_unlocked(thread, &pend_token);
1967 }
1968
1969 /*
1970 * Do the things that can't be done while holding a thread mutex.
1971 * These are set up to call back into thread policy to get the latest value,
1972 * so they don't have to be synchronized with the update.
1973 * The only required semantic is 'call this sometime after updating effective policy'
1974 *
1975 * Precondition: Thread mutex is not held
1976 *
1977 * This may be called with the task lock held, but in that case it won't be
1978 * called with tpt_update_sockets set.
1979 */
1980 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1981 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1982 {
1983 #ifdef MACH_BSD
1984 if (pend_token->tpt_update_sockets) {
1985 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1986 }
1987 #endif /* MACH_BSD */
1988
1989 if (pend_token->tpt_update_throttle) {
1990 rethrottle_thread(get_bsdthread_info(thread));
1991 }
1992
1993 if (pend_token->tpt_update_thread_sfi) {
1994 sfi_reevaluate(thread);
1995 }
1996
1997 if (pend_token->tpt_update_turnstile) {
1998 turnstile_update_thread_priority_chain(thread);
1999 }
2000 }
2001
2002 /*
2003 * Set and update thread policy
2004 * Thread mutex might be held
2005 */
2006 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2007 proc_set_thread_policy_locked(thread_t thread,
2008 int category,
2009 int flavor,
2010 int value,
2011 int value2,
2012 task_pend_token_t pend_token)
2013 {
2014 spl_t s = splsched();
2015 thread_lock(thread);
2016
2017 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2018
2019 thread_unlock(thread);
2020 splx(s);
2021 }
2022
2023 /*
2024 * Set and update thread policy
2025 * Thread spinlock is held
2026 */
2027 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2028 proc_set_thread_policy_spinlocked(thread_t thread,
2029 int category,
2030 int flavor,
2031 int value,
2032 int value2,
2033 task_pend_token_t pend_token)
2034 {
2035 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2036 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2037 thread_tid(thread), threquested_0(thread),
2038 threquested_1(thread), value, 0);
2039
2040 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2041
2042 thread_policy_update_spinlocked(thread, false, pend_token);
2043
2044 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2045 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2046 thread_tid(thread), threquested_0(thread),
2047 threquested_1(thread), tpending(pend_token), 0);
2048 }
2049
2050 /*
2051 * Set the requested state for a specific flavor to a specific value.
2052 */
2053 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2054 thread_set_requested_policy_spinlocked(thread_t thread,
2055 int category,
2056 int flavor,
2057 int value,
2058 int value2,
2059 task_pend_token_t pend_token)
2060 {
2061 int tier, passive;
2062
2063 struct thread_requested_policy requested = thread->requested_policy;
2064
2065 switch (flavor) {
2066 /* Category: EXTERNAL and INTERNAL, thread and task */
2067
2068 case TASK_POLICY_DARWIN_BG:
2069 if (category == TASK_POLICY_EXTERNAL) {
2070 requested.thrp_ext_darwinbg = value;
2071 } else {
2072 requested.thrp_int_darwinbg = value;
2073 }
2074 pend_token->tpt_update_turnstile = 1;
2075 break;
2076
2077 case TASK_POLICY_IOPOL:
2078 proc_iopol_to_tier(value, &tier, &passive);
2079 if (category == TASK_POLICY_EXTERNAL) {
2080 requested.thrp_ext_iotier = tier;
2081 requested.thrp_ext_iopassive = passive;
2082 } else {
2083 requested.thrp_int_iotier = tier;
2084 requested.thrp_int_iopassive = passive;
2085 }
2086 break;
2087
2088 case TASK_POLICY_IO:
2089 if (category == TASK_POLICY_EXTERNAL) {
2090 requested.thrp_ext_iotier = value;
2091 } else {
2092 requested.thrp_int_iotier = value;
2093 }
2094 break;
2095
2096 case TASK_POLICY_PASSIVE_IO:
2097 if (category == TASK_POLICY_EXTERNAL) {
2098 requested.thrp_ext_iopassive = value;
2099 } else {
2100 requested.thrp_int_iopassive = value;
2101 }
2102 break;
2103
2104 /* Category: ATTRIBUTE, thread only */
2105
2106 case TASK_POLICY_PIDBIND_BG:
2107 assert(category == TASK_POLICY_ATTRIBUTE);
2108 requested.thrp_pidbind_bg = value;
2109 pend_token->tpt_update_turnstile = 1;
2110 break;
2111
2112 case TASK_POLICY_LATENCY_QOS:
2113 assert(category == TASK_POLICY_ATTRIBUTE);
2114 requested.thrp_latency_qos = value;
2115 break;
2116
2117 case TASK_POLICY_THROUGH_QOS:
2118 assert(category == TASK_POLICY_ATTRIBUTE);
2119 requested.thrp_through_qos = value;
2120 break;
2121
2122 case TASK_POLICY_QOS_OVERRIDE:
2123 assert(category == TASK_POLICY_ATTRIBUTE);
2124 requested.thrp_qos_override = value;
2125 pend_token->tpt_update_turnstile = 1;
2126 break;
2127
2128 case TASK_POLICY_QOS_AND_RELPRIO:
2129 assert(category == TASK_POLICY_ATTRIBUTE);
2130 requested.thrp_qos = value;
2131 requested.thrp_qos_relprio = value2;
2132 pend_token->tpt_update_turnstile = 1;
2133 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2134 break;
2135
2136 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2137 assert(category == TASK_POLICY_ATTRIBUTE);
2138 requested.thrp_qos_workq_override = value;
2139 pend_token->tpt_update_turnstile = 1;
2140 break;
2141
2142 case TASK_POLICY_QOS_PROMOTE:
2143 assert(category == TASK_POLICY_ATTRIBUTE);
2144 requested.thrp_qos_promote = value;
2145 break;
2146
2147 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2148 assert(category == TASK_POLICY_ATTRIBUTE);
2149 requested.thrp_qos_kevent_override = value;
2150 pend_token->tpt_update_turnstile = 1;
2151 break;
2152
2153 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2154 assert(category == TASK_POLICY_ATTRIBUTE);
2155 requested.thrp_qos_wlsvc_override = value;
2156 pend_token->tpt_update_turnstile = 1;
2157 break;
2158
2159 case TASK_POLICY_TERMINATED:
2160 assert(category == TASK_POLICY_ATTRIBUTE);
2161 requested.thrp_terminated = value;
2162 break;
2163
2164 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2165 assert(category == TASK_POLICY_ATTRIBUTE);
2166 requested.thrp_iotier_kevent_override = value;
2167 break;
2168
2169 case TASK_POLICY_WI_DRIVEN:
2170 assert(category == TASK_POLICY_ATTRIBUTE);
2171 assert(thread == current_thread());
2172
2173 const bool set_policy = value;
2174 const sched_mode_t mode = value2;
2175
2176 requested.thrp_wi_driven = set_policy ? 1 : 0;
2177
2178 /*
2179 * No sched mode change for REALTIME (threads must explicitly
2180 * opt-in), however the priority_offset needs to be updated.
2181 */
2182 if (mode == TH_MODE_REALTIME) {
2183 const int pri = work_interval_get_priority(thread);
2184 assert3u(pri, >=, BASEPRI_RTQUEUES);
2185 thread->realtime.priority_offset = set_policy ?
2186 (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2187 } else {
2188 sched_set_thread_mode_user(thread, mode);
2189 if (set_policy) {
2190 thread->static_param = true;
2191 }
2192 }
2193 break;
2194
2195 default:
2196 panic("unknown task policy: %d %d %d", category, flavor, value);
2197 break;
2198 }
2199
2200 thread->requested_policy = requested;
2201 }
2202
2203 /*
2204 * Gets what you set. Effective values may be different.
2205 * Precondition: No locks are held
2206 */
2207 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2208 proc_get_thread_policy(thread_t thread,
2209 int category,
2210 int flavor)
2211 {
2212 int value = 0;
2213 thread_mtx_lock(thread);
2214 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2215 thread_mtx_unlock(thread);
2216 return value;
2217 }
2218
2219 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2220 proc_get_thread_policy_locked(thread_t thread,
2221 int category,
2222 int flavor,
2223 int* value2)
2224 {
2225 int value = 0;
2226
2227 spl_t s = splsched();
2228 thread_lock(thread);
2229
2230 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2231
2232 thread_unlock(thread);
2233 splx(s);
2234
2235 return value;
2236 }
2237
2238 /*
2239 * Gets what you set. Effective values may be different.
2240 */
2241 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2242 thread_get_requested_policy_spinlocked(thread_t thread,
2243 int category,
2244 int flavor,
2245 int* value2)
2246 {
2247 int value = 0;
2248
2249 struct thread_requested_policy requested = thread->requested_policy;
2250
2251 switch (flavor) {
2252 case TASK_POLICY_DARWIN_BG:
2253 if (category == TASK_POLICY_EXTERNAL) {
2254 value = requested.thrp_ext_darwinbg;
2255 } else {
2256 value = requested.thrp_int_darwinbg;
2257 }
2258 break;
2259 case TASK_POLICY_IOPOL:
2260 if (category == TASK_POLICY_EXTERNAL) {
2261 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2262 requested.thrp_ext_iopassive);
2263 } else {
2264 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2265 requested.thrp_int_iopassive);
2266 }
2267 break;
2268 case TASK_POLICY_IO:
2269 if (category == TASK_POLICY_EXTERNAL) {
2270 value = requested.thrp_ext_iotier;
2271 } else {
2272 value = requested.thrp_int_iotier;
2273 }
2274 break;
2275 case TASK_POLICY_PASSIVE_IO:
2276 if (category == TASK_POLICY_EXTERNAL) {
2277 value = requested.thrp_ext_iopassive;
2278 } else {
2279 value = requested.thrp_int_iopassive;
2280 }
2281 break;
2282 case TASK_POLICY_QOS:
2283 assert(category == TASK_POLICY_ATTRIBUTE);
2284 value = requested.thrp_qos;
2285 break;
2286 case TASK_POLICY_QOS_OVERRIDE:
2287 assert(category == TASK_POLICY_ATTRIBUTE);
2288 value = requested.thrp_qos_override;
2289 break;
2290 case TASK_POLICY_LATENCY_QOS:
2291 assert(category == TASK_POLICY_ATTRIBUTE);
2292 value = requested.thrp_latency_qos;
2293 break;
2294 case TASK_POLICY_THROUGH_QOS:
2295 assert(category == TASK_POLICY_ATTRIBUTE);
2296 value = requested.thrp_through_qos;
2297 break;
2298 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2299 assert(category == TASK_POLICY_ATTRIBUTE);
2300 value = requested.thrp_qos_workq_override;
2301 break;
2302 case TASK_POLICY_QOS_AND_RELPRIO:
2303 assert(category == TASK_POLICY_ATTRIBUTE);
2304 assert(value2 != NULL);
2305 value = requested.thrp_qos;
2306 *value2 = requested.thrp_qos_relprio;
2307 break;
2308 case TASK_POLICY_QOS_PROMOTE:
2309 assert(category == TASK_POLICY_ATTRIBUTE);
2310 value = requested.thrp_qos_promote;
2311 break;
2312 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2313 assert(category == TASK_POLICY_ATTRIBUTE);
2314 value = requested.thrp_qos_kevent_override;
2315 break;
2316 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2317 assert(category == TASK_POLICY_ATTRIBUTE);
2318 value = requested.thrp_qos_wlsvc_override;
2319 break;
2320 case TASK_POLICY_TERMINATED:
2321 assert(category == TASK_POLICY_ATTRIBUTE);
2322 value = requested.thrp_terminated;
2323 break;
2324 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2325 assert(category == TASK_POLICY_ATTRIBUTE);
2326 value = requested.thrp_iotier_kevent_override;
2327 break;
2328
2329 case TASK_POLICY_WI_DRIVEN:
2330 assert(category == TASK_POLICY_ATTRIBUTE);
2331 value = requested.thrp_wi_driven;
2332 break;
2333
2334 default:
2335 panic("unknown policy_flavor %d", flavor);
2336 break;
2337 }
2338
2339 return value;
2340 }
2341
2342 /*
2343 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2344 *
2345 * NOTE: This accessor does not take the task or thread lock.
2346 * Notifications of state updates need to be externally synchronized with state queries.
2347 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2348 * within the context of a timer interrupt.
2349 *
2350 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2351 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2352 * I don't think that cost is worth not having the right answer.
2353 */
2354 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2355 proc_get_effective_thread_policy(thread_t thread,
2356 int flavor)
2357 {
2358 int value = 0;
2359
2360 switch (flavor) {
2361 case TASK_POLICY_DARWIN_BG:
2362 /*
2363 * This call is used within the timer layer, as well as
2364 * prioritizing requests to the graphics system.
2365 * It also informs SFI and originator-bg-state.
2366 * Returns 1 for background mode, 0 for normal mode
2367 */
2368
2369 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2370 break;
2371 case TASK_POLICY_IO:
2372 /*
2373 * The I/O system calls here to find out what throttling tier to apply to an operation.
2374 * Returns THROTTLE_LEVEL_* values
2375 */
2376 value = thread->effective_policy.thep_io_tier;
2377 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2378 value = MIN(value, thread->iotier_override);
2379 }
2380 break;
2381 case TASK_POLICY_PASSIVE_IO:
2382 /*
2383 * The I/O system calls here to find out whether an operation should be passive.
2384 * (i.e. not cause operations with lower throttle tiers to be throttled)
2385 * Returns 1 for passive mode, 0 for normal mode
2386 *
2387 * If an override is causing IO to go into a lower tier, we also set
2388 * the passive bit so that a thread doesn't end up stuck in its own throttle
2389 * window when the override goes away.
2390 */
2391 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2392 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2393 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2394 value = 1;
2395 }
2396 break;
2397 case TASK_POLICY_ALL_SOCKETS_BG:
2398 /*
2399 * do_background_socket() calls this to determine whether
2400 * it should change the thread's sockets
2401 * Returns 1 for background mode, 0 for normal mode
2402 * This consults both thread and task so un-DBGing a thread while the task is BG
2403 * doesn't get you out of the network throttle.
2404 */
2405 value = (thread->effective_policy.thep_all_sockets_bg ||
2406 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2407 break;
2408 case TASK_POLICY_NEW_SOCKETS_BG:
2409 /*
2410 * socreate() calls this to determine if it should mark a new socket as background
2411 * Returns 1 for background mode, 0 for normal mode
2412 */
2413 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2414 break;
2415 case TASK_POLICY_LATENCY_QOS:
2416 /*
2417 * timer arming calls into here to find out the timer coalescing level
2418 * Returns a latency QoS tier (0-6)
2419 */
2420 value = thread->effective_policy.thep_latency_qos;
2421 break;
2422 case TASK_POLICY_THROUGH_QOS:
2423 /*
2424 * This value is passed into the urgency callout from the scheduler
2425 * to the performance management subsystem.
2426 *
2427 * Returns a throughput QoS tier (0-6)
2428 */
2429 value = thread->effective_policy.thep_through_qos;
2430 break;
2431 case TASK_POLICY_QOS:
2432 /*
2433 * This is communicated to the performance management layer and SFI.
2434 *
2435 * Returns a QoS policy tier
2436 */
2437 value = thread->effective_policy.thep_qos;
2438 break;
2439 default:
2440 panic("unknown thread policy flavor %d", flavor);
2441 break;
2442 }
2443
2444 return value;
2445 }
2446
2447
2448 /*
2449 * (integer_t) casts limit the number of bits we can fit here
2450 * this interface is deprecated and replaced by the _EXT struct ?
2451 */
2452 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2453 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2454 {
2455 uint64_t bits = 0;
2456 struct thread_requested_policy requested = thread->requested_policy;
2457
2458 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2459 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2460 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2461 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2462 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2463 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2464
2465 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2466 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2467
2468 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2469
2470 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2471 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2472
2473 info->requested = (integer_t) bits;
2474 bits = 0;
2475
2476 struct thread_effective_policy effective = thread->effective_policy;
2477
2478 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2479
2480 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2481 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2482 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2483 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2484
2485 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2486
2487 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2488 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2489
2490 info->effective = (integer_t)bits;
2491 bits = 0;
2492
2493 info->pending = 0;
2494 }
2495
2496 /*
2497 * Sneakily trace either the task and thread requested
2498 * or just the thread requested, depending on if we have enough room.
2499 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2500 *
2501 * LP32 LP64
2502 * threquested_0(thread) thread[0] task[0]
2503 * threquested_1(thread) thread[1] thread[0]
2504 *
2505 */
2506
2507 uintptr_t
threquested_0(thread_t thread)2508 threquested_0(thread_t thread)
2509 {
2510 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2511
2512 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2513
2514 return raw[0];
2515 }
2516
2517 uintptr_t
threquested_1(thread_t thread)2518 threquested_1(thread_t thread)
2519 {
2520 #if defined __LP64__
2521 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2522 #else
2523 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2524 return raw[1];
2525 #endif
2526 }
2527
2528 uintptr_t
theffective_0(thread_t thread)2529 theffective_0(thread_t thread)
2530 {
2531 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2532
2533 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2534 return raw[0];
2535 }
2536
2537 uintptr_t
theffective_1(thread_t thread)2538 theffective_1(thread_t thread)
2539 {
2540 #if defined __LP64__
2541 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2542 #else
2543 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2544 return raw[1];
2545 #endif
2546 }
2547
2548
2549 /*
2550 * Set an override on the thread which is consulted with a
2551 * higher priority than the task/thread policy. This should
2552 * only be set for temporary grants until the thread
2553 * returns to the userspace boundary
2554 *
2555 * We use atomic operations to swap in the override, with
2556 * the assumption that the thread itself can
2557 * read the override and clear it on return to userspace.
2558 *
2559 * No locking is performed, since it is acceptable to see
2560 * a stale override for one loop through throttle_lowpri_io().
2561 * However a thread reference must be held on the thread.
2562 */
2563
2564 void
set_thread_iotier_override(thread_t thread,int policy)2565 set_thread_iotier_override(thread_t thread, int policy)
2566 {
2567 int current_override;
2568
2569 /* Let most aggressive I/O policy win until user boundary */
2570 do {
2571 current_override = thread->iotier_override;
2572
2573 if (current_override != THROTTLE_LEVEL_NONE) {
2574 policy = MIN(current_override, policy);
2575 }
2576
2577 if (current_override == policy) {
2578 /* no effective change */
2579 return;
2580 }
2581 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2582
2583 /*
2584 * Since the thread may be currently throttled,
2585 * re-evaluate tiers and potentially break out
2586 * of an msleep
2587 */
2588 rethrottle_thread(get_bsdthread_info(thread));
2589 }
2590
2591 /*
2592 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2593 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2594 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2595 * priority thread. In these cases, we attempt to propagate the priority token, as long
2596 * as the subsystem informs us of the relationships between the threads. The userspace
2597 * synchronization subsystem should maintain the information of owner->resource and
2598 * resource->waiters itself.
2599 */
2600
2601 /*
2602 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2603 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2604 * to be handled specially in the future, but for now it's fine to slam
2605 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2606 */
2607 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2608 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2609 {
2610 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2611 /* Map all input resource/type to a single one */
2612 *resource = USER_ADDR_NULL;
2613 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2614 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2615 /* no transform */
2616 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2617 /* Map all mutex overrides to a single one, to avoid memory overhead */
2618 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2619 *resource = USER_ADDR_NULL;
2620 }
2621 }
2622 }
2623
2624 /* This helper routine finds an existing override if known. Locking should be done by caller */
2625 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2626 find_qos_override(thread_t thread,
2627 user_addr_t resource,
2628 int resource_type)
2629 {
2630 struct thread_qos_override *override;
2631
2632 override = thread->overrides;
2633 while (override) {
2634 if (override->override_resource == resource &&
2635 override->override_resource_type == resource_type) {
2636 return override;
2637 }
2638
2639 override = override->override_next;
2640 }
2641
2642 return NULL;
2643 }
2644
2645 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2646 find_and_decrement_qos_override(thread_t thread,
2647 user_addr_t resource,
2648 int resource_type,
2649 boolean_t reset,
2650 struct thread_qos_override **free_override_list)
2651 {
2652 struct thread_qos_override *override, *override_prev;
2653
2654 override_prev = NULL;
2655 override = thread->overrides;
2656 while (override) {
2657 struct thread_qos_override *override_next = override->override_next;
2658
2659 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2660 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2661 if (reset) {
2662 override->override_contended_resource_count = 0;
2663 } else {
2664 override->override_contended_resource_count--;
2665 }
2666
2667 if (override->override_contended_resource_count == 0) {
2668 if (override_prev == NULL) {
2669 thread->overrides = override_next;
2670 } else {
2671 override_prev->override_next = override_next;
2672 }
2673
2674 /* Add to out-param for later zfree */
2675 override->override_next = *free_override_list;
2676 *free_override_list = override;
2677 } else {
2678 override_prev = override;
2679 }
2680
2681 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2682 return;
2683 }
2684 } else {
2685 override_prev = override;
2686 }
2687
2688 override = override_next;
2689 }
2690 }
2691
2692 /* This helper recalculates the current requested override using the policy selected at boot */
2693 static int
calculate_requested_qos_override(thread_t thread)2694 calculate_requested_qos_override(thread_t thread)
2695 {
2696 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2697 return THREAD_QOS_UNSPECIFIED;
2698 }
2699
2700 /* iterate over all overrides and calculate MAX */
2701 struct thread_qos_override *override;
2702 int qos_override = THREAD_QOS_UNSPECIFIED;
2703
2704 override = thread->overrides;
2705 while (override) {
2706 qos_override = MAX(qos_override, override->override_qos);
2707 override = override->override_next;
2708 }
2709
2710 return qos_override;
2711 }
2712
2713 /*
2714 * Returns:
2715 * - 0 on success
2716 * - EINVAL if some invalid input was passed
2717 */
2718 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2719 proc_thread_qos_add_override_internal(thread_t thread,
2720 int override_qos,
2721 boolean_t first_override_for_resource,
2722 user_addr_t resource,
2723 int resource_type)
2724 {
2725 struct task_pend_token pend_token = {};
2726 int rc = 0;
2727
2728 thread_mtx_lock(thread);
2729
2730 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2731 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2732
2733 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2734 uint64_t, thread->requested_policy.thrp_qos,
2735 uint64_t, thread->effective_policy.thep_qos,
2736 int, override_qos, boolean_t, first_override_for_resource);
2737
2738 struct thread_qos_override *override;
2739 struct thread_qos_override *override_new = NULL;
2740 int new_qos_override, prev_qos_override;
2741 int new_effective_qos;
2742
2743 canonicalize_resource_and_type(&resource, &resource_type);
2744
2745 override = find_qos_override(thread, resource, resource_type);
2746 if (first_override_for_resource && !override) {
2747 /* We need to allocate a new object. Drop the thread lock and
2748 * recheck afterwards in case someone else added the override
2749 */
2750 thread_mtx_unlock(thread);
2751 override_new = zalloc(thread_qos_override_zone);
2752 thread_mtx_lock(thread);
2753 override = find_qos_override(thread, resource, resource_type);
2754 }
2755 if (first_override_for_resource && override) {
2756 /* Someone else already allocated while the thread lock was dropped */
2757 override->override_contended_resource_count++;
2758 } else if (!override && override_new) {
2759 override = override_new;
2760 override_new = NULL;
2761 override->override_next = thread->overrides;
2762 /* since first_override_for_resource was TRUE */
2763 override->override_contended_resource_count = 1;
2764 override->override_resource = resource;
2765 override->override_resource_type = (int16_t)resource_type;
2766 override->override_qos = THREAD_QOS_UNSPECIFIED;
2767 thread->overrides = override;
2768 }
2769
2770 if (override) {
2771 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2772 override->override_qos = (int16_t)override_qos;
2773 } else {
2774 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2775 }
2776 }
2777
2778 /* Determine how to combine the various overrides into a single current
2779 * requested override
2780 */
2781 new_qos_override = calculate_requested_qos_override(thread);
2782
2783 prev_qos_override = proc_get_thread_policy_locked(thread,
2784 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2785
2786 if (new_qos_override != prev_qos_override) {
2787 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2788 TASK_POLICY_QOS_OVERRIDE,
2789 new_qos_override, 0, &pend_token);
2790 }
2791
2792 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2793
2794 thread_mtx_unlock(thread);
2795
2796 thread_policy_update_complete_unlocked(thread, &pend_token);
2797
2798 if (override_new) {
2799 zfree(thread_qos_override_zone, override_new);
2800 }
2801
2802 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2803 int, new_qos_override, int, new_effective_qos, int, rc);
2804
2805 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2806 new_qos_override, resource, resource_type, 0, 0);
2807
2808 return rc;
2809 }
2810
2811 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2812 proc_thread_qos_add_override(task_t task,
2813 thread_t thread,
2814 uint64_t tid,
2815 int override_qos,
2816 boolean_t first_override_for_resource,
2817 user_addr_t resource,
2818 int resource_type)
2819 {
2820 boolean_t has_thread_reference = FALSE;
2821 int rc = 0;
2822
2823 if (thread == THREAD_NULL) {
2824 thread = task_findtid(task, tid);
2825 /* returns referenced thread */
2826
2827 if (thread == THREAD_NULL) {
2828 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2829 tid, 0, 0xdead, 0, 0);
2830 return ESRCH;
2831 }
2832 has_thread_reference = TRUE;
2833 } else {
2834 assert(get_threadtask(thread) == task);
2835 }
2836 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2837 first_override_for_resource, resource, resource_type);
2838 if (has_thread_reference) {
2839 thread_deallocate(thread);
2840 }
2841
2842 return rc;
2843 }
2844
2845 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2846 proc_thread_qos_remove_override_internal(thread_t thread,
2847 user_addr_t resource,
2848 int resource_type,
2849 boolean_t reset)
2850 {
2851 struct task_pend_token pend_token = {};
2852
2853 struct thread_qos_override *deferred_free_override_list = NULL;
2854 int new_qos_override, prev_qos_override, new_effective_qos;
2855
2856 thread_mtx_lock(thread);
2857
2858 canonicalize_resource_and_type(&resource, &resource_type);
2859
2860 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2861
2862 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2863 thread_tid(thread), resource, reset, 0, 0);
2864
2865 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2866 uint64_t, thread->requested_policy.thrp_qos,
2867 uint64_t, thread->effective_policy.thep_qos);
2868
2869 /* Determine how to combine the various overrides into a single current requested override */
2870 new_qos_override = calculate_requested_qos_override(thread);
2871
2872 spl_t s = splsched();
2873 thread_lock(thread);
2874
2875 /*
2876 * The override chain and therefore the value of the current override is locked with thread mutex,
2877 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2878 * This means you can't change the current override from a spinlock-only setter.
2879 */
2880 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2881
2882 if (new_qos_override != prev_qos_override) {
2883 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2884 }
2885
2886 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2887
2888 thread_unlock(thread);
2889 splx(s);
2890
2891 thread_mtx_unlock(thread);
2892
2893 thread_policy_update_complete_unlocked(thread, &pend_token);
2894
2895 while (deferred_free_override_list) {
2896 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2897
2898 zfree(thread_qos_override_zone, deferred_free_override_list);
2899 deferred_free_override_list = override_next;
2900 }
2901
2902 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2903 int, new_qos_override, int, new_effective_qos);
2904
2905 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2906 thread_tid(thread), 0, 0, 0, 0);
2907 }
2908
2909 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2910 proc_thread_qos_remove_override(task_t task,
2911 thread_t thread,
2912 uint64_t tid,
2913 user_addr_t resource,
2914 int resource_type)
2915 {
2916 boolean_t has_thread_reference = FALSE;
2917
2918 if (thread == THREAD_NULL) {
2919 thread = task_findtid(task, tid);
2920 /* returns referenced thread */
2921
2922 if (thread == THREAD_NULL) {
2923 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2924 tid, 0, 0xdead, 0, 0);
2925 return ESRCH;
2926 }
2927 has_thread_reference = TRUE;
2928 } else {
2929 assert(task == get_threadtask(thread));
2930 }
2931
2932 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2933
2934 if (has_thread_reference) {
2935 thread_deallocate(thread);
2936 }
2937
2938 return 0;
2939 }
2940
2941 /* Deallocate before thread termination */
2942 void
proc_thread_qos_deallocate(thread_t thread)2943 proc_thread_qos_deallocate(thread_t thread)
2944 {
2945 /* This thread must have no more IPC overrides. */
2946 assert(thread->kevent_overrides == 0);
2947 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2948 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2949
2950 /*
2951 * Clear out any lingering override objects.
2952 */
2953 struct thread_qos_override *override;
2954
2955 thread_mtx_lock(thread);
2956 override = thread->overrides;
2957 thread->overrides = NULL;
2958 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2959 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2960 thread_mtx_unlock(thread);
2961
2962 while (override) {
2963 struct thread_qos_override *override_next = override->override_next;
2964
2965 zfree(thread_qos_override_zone, override);
2966 override = override_next;
2967 }
2968 }
2969
2970 /*
2971 * Set up the primordial thread's QoS
2972 */
2973 void
task_set_main_thread_qos(task_t task,thread_t thread)2974 task_set_main_thread_qos(task_t task, thread_t thread)
2975 {
2976 struct task_pend_token pend_token = {};
2977
2978 assert(get_threadtask(thread) == task);
2979
2980 thread_mtx_lock(thread);
2981
2982 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2983 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2984 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2985 thread->requested_policy.thrp_qos, 0);
2986
2987 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2988
2989 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2990 primordial_qos, 0, &pend_token);
2991
2992 thread_mtx_unlock(thread);
2993
2994 thread_policy_update_complete_unlocked(thread, &pend_token);
2995
2996 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2997 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2998 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2999 primordial_qos, 0);
3000 }
3001
3002 /*
3003 * KPI for pthread kext
3004 *
3005 * Return a good guess at what the initial manager QoS will be
3006 * Dispatch can override this in userspace if it so chooses
3007 */
3008 thread_qos_t
task_get_default_manager_qos(task_t task)3009 task_get_default_manager_qos(task_t task)
3010 {
3011 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
3012
3013 if (primordial_qos == THREAD_QOS_LEGACY) {
3014 primordial_qos = THREAD_QOS_USER_INITIATED;
3015 }
3016
3017 return primordial_qos;
3018 }
3019
3020 /*
3021 * Check if the kernel promotion on thread has changed
3022 * and apply it.
3023 *
3024 * thread locked on entry and exit
3025 */
3026 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3027 thread_recompute_kernel_promotion_locked(thread_t thread)
3028 {
3029 boolean_t needs_update = FALSE;
3030 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3031
3032 /*
3033 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3034 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3035 * and propagates the priority through the chain with the same cap, because as of now it does
3036 * not differenciate on the kernel primitive.
3037 *
3038 * If this assumption will change with the adoption of a kernel primitive that does not
3039 * cap the when adding/propagating,
3040 * then here is the place to put the generic cap for all kernel primitives
3041 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3042 */
3043 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3044
3045 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3046 KDBG(MACHDBG_CODE(
3047 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3048 thread_tid(thread),
3049 kern_promotion_schedpri,
3050 thread->kern_promotion_schedpri);
3051
3052 needs_update = TRUE;
3053 thread->kern_promotion_schedpri = kern_promotion_schedpri;
3054 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3055 }
3056
3057 return needs_update;
3058 }
3059
3060 /*
3061 * Check if the user promotion on thread has changed
3062 * and apply it.
3063 *
3064 * thread locked on entry, might drop the thread lock
3065 * and reacquire it.
3066 */
3067 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3068 thread_recompute_user_promotion_locked(thread_t thread)
3069 {
3070 boolean_t needs_update = FALSE;
3071 struct task_pend_token pend_token = {};
3072 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3073 int old_base_pri = thread->base_pri;
3074 thread_qos_t qos_promotion;
3075
3076 /* Check if user promotion has changed */
3077 if (thread->user_promotion_basepri == user_promotion_basepri) {
3078 return needs_update;
3079 } else {
3080 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3081 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3082 thread_tid(thread),
3083 user_promotion_basepri,
3084 thread->user_promotion_basepri,
3085 0, 0);
3086 KDBG(MACHDBG_CODE(
3087 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3088 thread_tid(thread),
3089 user_promotion_basepri,
3090 thread->user_promotion_basepri);
3091 }
3092
3093 /* Update the user promotion base pri */
3094 thread->user_promotion_basepri = user_promotion_basepri;
3095 pend_token.tpt_force_recompute_pri = 1;
3096
3097 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3098 qos_promotion = THREAD_QOS_UNSPECIFIED;
3099 } else {
3100 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3101 }
3102
3103 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3104 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3105
3106 if (thread_get_waiting_turnstile(thread) &&
3107 thread->base_pri != old_base_pri) {
3108 needs_update = TRUE;
3109 }
3110
3111 thread_unlock(thread);
3112
3113 thread_policy_update_complete_unlocked(thread, &pend_token);
3114
3115 thread_lock(thread);
3116
3117 return needs_update;
3118 }
3119
3120 /*
3121 * Convert the thread user promotion base pri to qos for threads in qos world.
3122 * For priority above UI qos, the qos would be set to UI.
3123 */
3124 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3125 thread_user_promotion_qos_for_pri(int priority)
3126 {
3127 thread_qos_t qos;
3128 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3129 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3130 return qos;
3131 }
3132 }
3133 return THREAD_QOS_MAINTENANCE;
3134 }
3135
3136 /*
3137 * Set the thread's QoS Kevent override
3138 * Owned by the Kevent subsystem
3139 *
3140 * May be called with spinlocks held, but not spinlocks
3141 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3142 *
3143 * One 'add' must be balanced by one 'drop'.
3144 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3145 * Before the thread is deallocated, there must be 0 remaining overrides.
3146 */
3147 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3148 thread_kevent_override(thread_t thread,
3149 uint32_t qos_override,
3150 boolean_t is_new_override)
3151 {
3152 struct task_pend_token pend_token = {};
3153 boolean_t needs_update;
3154
3155 spl_t s = splsched();
3156 thread_lock(thread);
3157
3158 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3159
3160 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3161 assert(qos_override < THREAD_QOS_LAST);
3162
3163 if (is_new_override) {
3164 if (thread->kevent_overrides++ == 0) {
3165 /* This add is the first override for this thread */
3166 assert(old_override == THREAD_QOS_UNSPECIFIED);
3167 } else {
3168 /* There are already other overrides in effect for this thread */
3169 assert(old_override > THREAD_QOS_UNSPECIFIED);
3170 }
3171 } else {
3172 /* There must be at least one override (the previous add call) in effect */
3173 assert(thread->kevent_overrides > 0);
3174 assert(old_override > THREAD_QOS_UNSPECIFIED);
3175 }
3176
3177 /*
3178 * We can't allow lowering if there are several IPC overrides because
3179 * the caller can't possibly know the whole truth
3180 */
3181 if (thread->kevent_overrides == 1) {
3182 needs_update = qos_override != old_override;
3183 } else {
3184 needs_update = qos_override > old_override;
3185 }
3186
3187 if (needs_update) {
3188 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3189 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3190 qos_override, 0, &pend_token);
3191 assert(pend_token.tpt_update_sockets == 0);
3192 }
3193
3194 thread_unlock(thread);
3195 splx(s);
3196
3197 thread_policy_update_complete_unlocked(thread, &pend_token);
3198 }
3199
3200 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3201 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3202 {
3203 thread_kevent_override(thread, qos_override, TRUE);
3204 }
3205
3206 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3207 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3208 {
3209 thread_kevent_override(thread, qos_override, FALSE);
3210 }
3211
3212 void
thread_drop_kevent_override(thread_t thread)3213 thread_drop_kevent_override(thread_t thread)
3214 {
3215 struct task_pend_token pend_token = {};
3216
3217 spl_t s = splsched();
3218 thread_lock(thread);
3219
3220 assert(thread->kevent_overrides > 0);
3221
3222 if (--thread->kevent_overrides == 0) {
3223 /*
3224 * There are no more overrides for this thread, so we should
3225 * clear out the saturated override value
3226 */
3227
3228 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3229 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3230 0, &pend_token);
3231 }
3232
3233 thread_unlock(thread);
3234 splx(s);
3235
3236 thread_policy_update_complete_unlocked(thread, &pend_token);
3237 }
3238
3239 /*
3240 * Set the thread's QoS Workloop Servicer override
3241 * Owned by the Kevent subsystem
3242 *
3243 * May be called with spinlocks held, but not spinlocks
3244 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3245 *
3246 * One 'add' must be balanced by one 'drop'.
3247 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3248 * Before the thread is deallocated, there must be 0 remaining overrides.
3249 */
3250 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3251 thread_servicer_override(thread_t thread,
3252 uint32_t qos_override,
3253 boolean_t is_new_override)
3254 {
3255 struct task_pend_token pend_token = {};
3256
3257 spl_t s = splsched();
3258 thread_lock(thread);
3259
3260 if (is_new_override) {
3261 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3262 } else {
3263 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3264 }
3265
3266 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3267 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3268 qos_override, 0, &pend_token);
3269
3270 thread_unlock(thread);
3271 splx(s);
3272
3273 assert(pend_token.tpt_update_sockets == 0);
3274 thread_policy_update_complete_unlocked(thread, &pend_token);
3275 }
3276
3277 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3278 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3279 {
3280 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3281 assert(qos_override < THREAD_QOS_LAST);
3282
3283 thread_servicer_override(thread, qos_override, TRUE);
3284 }
3285
3286 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3287 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3288 {
3289 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3290 assert(qos_override < THREAD_QOS_LAST);
3291
3292 thread_servicer_override(thread, qos_override, FALSE);
3293 }
3294
3295 void
thread_drop_servicer_override(thread_t thread)3296 thread_drop_servicer_override(thread_t thread)
3297 {
3298 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3299 }
3300
3301 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3302 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3303 {
3304 struct task_pend_token pend_token = {};
3305 uint8_t current_iotier;
3306
3307 /* Check if the update is needed */
3308 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3309 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3310
3311 if (iotier_override == current_iotier) {
3312 return;
3313 }
3314
3315 spl_t s = splsched();
3316 thread_lock(thread);
3317
3318 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3319 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3320 iotier_override, 0, &pend_token);
3321
3322 thread_unlock(thread);
3323 splx(s);
3324
3325 assert(pend_token.tpt_update_sockets == 0);
3326 thread_policy_update_complete_unlocked(thread, &pend_token);
3327 }
3328
3329 /* Get current requested qos / relpri, may be called from spinlock context */
3330 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3331 thread_get_requested_qos(thread_t thread, int *relpri)
3332 {
3333 int relprio_value = 0;
3334 thread_qos_t qos;
3335
3336 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3337 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3338 if (relpri) {
3339 *relpri = -relprio_value;
3340 }
3341 return qos;
3342 }
3343
3344 /*
3345 * This function will promote the thread priority
3346 * since exec could block other threads calling
3347 * proc_find on the proc. This boost must be removed
3348 * via call to thread_clear_exec_promotion.
3349 *
3350 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3351 */
3352 void
thread_set_exec_promotion(thread_t thread)3353 thread_set_exec_promotion(thread_t thread)
3354 {
3355 spl_t s = splsched();
3356 thread_lock(thread);
3357
3358 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3359
3360 thread_unlock(thread);
3361 splx(s);
3362 }
3363
3364 /*
3365 * This function will clear the exec thread
3366 * promotion set on the thread by thread_set_exec_promotion.
3367 */
3368 void
thread_clear_exec_promotion(thread_t thread)3369 thread_clear_exec_promotion(thread_t thread)
3370 {
3371 spl_t s = splsched();
3372 thread_lock(thread);
3373
3374 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3375
3376 thread_unlock(thread);
3377 splx(s);
3378 }
3379
3380 #if CONFIG_SCHED_RT_ALLOW
3381
3382 /*
3383 * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3384 * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3385 * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3386 */
3387 static TUNABLE(
3388 bool,
3389 rt_allow_policy_enabled,
3390 "-rt-allow_policy-enable",
3391 #if XNU_TARGET_OS_XR
3392 true
3393 #else
3394 false
3395 #endif /* XNU_TARGET_OS_XR */
3396 );
3397
3398 /*
3399 * When the RT allow policy is enabled and a thread allowed to become RT,
3400 * sometimes (if the processes RT allow policy is restricted) the thread will
3401 * have a CPU limit enforced. The following two tunables determine the
3402 * parameters for that CPU limit.
3403 */
3404
3405 /* % of the interval allowed to run. */
3406 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3407 "rt_allow_limit_percent", 70);
3408
3409 /* The length of interval in nanoseconds. */
3410 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3411 "rt_allow_limit_interval", 10);
3412
3413 static bool
thread_has_rt(thread_t thread)3414 thread_has_rt(thread_t thread)
3415 {
3416 return
3417 thread->sched_mode == TH_MODE_REALTIME ||
3418 thread->saved_mode == TH_MODE_REALTIME;
3419 }
3420
3421 /*
3422 * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3423 * up by the target thread via the ledger AST.
3424 */
3425 static void
thread_rt_set_cpulimit(thread_t thread)3426 thread_rt_set_cpulimit(thread_t thread)
3427 {
3428 /* Force reasonable values for the cpu limit. */
3429 const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3430 const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3431
3432 thread->t_ledger_req_percentage = percent;
3433 thread->t_ledger_req_interval_ms = interval_ms;
3434 thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3435
3436 thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3437 }
3438
3439 /* Similar to the above but removes any CPU limit. */
3440 static void
thread_rt_clear_cpulimit(thread_t thread)3441 thread_rt_clear_cpulimit(thread_t thread)
3442 {
3443 thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3444
3445 thread->t_ledger_req_percentage = 0;
3446 thread->t_ledger_req_interval_ms = 0;
3447 thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3448 }
3449
3450 /*
3451 * Evaluate RT policy for a thread, demoting and undemoting as needed.
3452 */
3453 void
thread_rt_evaluate(thread_t thread)3454 thread_rt_evaluate(thread_t thread)
3455 {
3456 task_t task = get_threadtask(thread);
3457 bool platform_binary = false;
3458
3459 /* If the RT allow policy is not enabled - nothing to do. */
3460 if (!rt_allow_policy_enabled) {
3461 return;
3462 }
3463
3464 /* User threads only. */
3465 if (task == kernel_task) {
3466 return;
3467 }
3468
3469 /* Check for platform binary. */
3470 platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3471
3472 spl_t s = splsched();
3473 thread_lock(thread);
3474
3475 const thread_work_interval_flags_t wi_flags =
3476 os_atomic_load(&thread->th_work_interval_flags, relaxed);
3477
3478 /*
3479 * RT threads which are not joined to a work interval which allows RT
3480 * threads are demoted. Once those conditions no longer hold, the thread
3481 * undemoted.
3482 */
3483 if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3484 if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3485 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3486 thread_tid(thread));
3487 sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3488 }
3489 } else {
3490 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3491 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3492 }
3493 }
3494
3495 /*
3496 * RT threads get a CPU limit unless they're part of a platform binary
3497 * task. If the thread is no longer RT, any existing CPU limit should be
3498 * removed.
3499 */
3500 bool set_ast = false;
3501 if (!platform_binary &&
3502 thread_has_rt(thread) &&
3503 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3504 thread_rt_set_cpulimit(thread);
3505 set_ast = true;
3506 }
3507
3508 if (!platform_binary &&
3509 !thread_has_rt(thread) &&
3510 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3511 thread_rt_clear_cpulimit(thread);
3512 set_ast = true;
3513 }
3514
3515 thread_unlock(thread);
3516 splx(s);
3517
3518 if (set_ast) {
3519 /* Ensure the target thread picks up any CPU limit change. */
3520 act_set_astledger(thread);
3521 }
3522 }
3523
3524 #else
3525
3526 void
thread_rt_evaluate(__unused thread_t thread)3527 thread_rt_evaluate(__unused thread_t thread)
3528 {
3529 }
3530
3531 #endif /* CONFIG_SCHED_RT_ALLOW */
3532