xref: /xnu-8796.101.5/osfmk/kperf/kperfbsd.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*  sysctl interface for parameters from user-land */
30 
31 #include <kern/debug.h>
32 #include <libkern/libkern.h>
33 #include <pexpert/pexpert.h>
34 #include <sys/param.h>
35 #include <sys/mman.h>
36 #include <sys/stat.h>
37 #include <sys/sysctl.h>
38 #include <sys/kauth.h>
39 
40 #include <kperf/action.h>
41 #include <kperf/context.h>
42 #include <kperf/kdebug_trigger.h>
43 #include <kperf/kperf.h>
44 #include <kperf/kperfbsd.h>
45 #include <kperf/kptimer.h>
46 #include <kperf/pet.h>
47 #include <kperf/lazy.h>
48 
49 #include <sys/ktrace.h>
50 
51 /* Requests from kperf sysctls. */
52 enum kperf_request {
53 	REQ_SAMPLING,
54 	REQ_RESET,
55 
56 	REQ_ACTION_COUNT,
57 	REQ_ACTION_SAMPLERS,
58 	REQ_ACTION_USERDATA,
59 	REQ_ACTION_FILTER_BY_TASK,
60 	REQ_ACTION_FILTER_BY_PID,
61 	REQ_ACTION_UCALLSTACK_DEPTH,
62 	REQ_ACTION_KCALLSTACK_DEPTH,
63 
64 	REQ_TIMER_COUNT,
65 	REQ_TIMER_PERIOD,
66 	REQ_TIMER_PET,
67 	REQ_TIMER_ACTION,
68 
69 	REQ_KDBG_CSWITCH,
70 
71 	REQ_BLESS,
72 	REQ_BLESS_PREEMPT,
73 
74 	REQ_PET_IDLE_RATE,
75 	REQ_LIGHTWEIGHT_PET,
76 
77 	REQ_KDEBUG_FILTER,
78 	REQ_KDEBUG_ACTION,
79 
80 	REQ_LAZY_WAIT_TIME_THRESHOLD,
81 	REQ_LAZY_WAIT_ACTION,
82 	REQ_LAZY_CPU_TIME_THRESHOLD,
83 	REQ_LAZY_CPU_ACTION,
84 };
85 
86 int kperf_debug_level = 0;
87 
88 #if DEVELOPMENT || DEBUG
89 _Atomic long long kperf_pending_ipis = 0;
90 #endif /* DEVELOPMENT || DEBUG */
91 
92 /*
93  * kperf has unique requirements from sysctl.
94  *
95  * For simple queries like the number of actions, the normal sysctl style
96  * of get/set works well.
97  *
98  * However, when requesting information about something specific, like an
99  * action, user space needs to provide some contextual information.  This
100  * information is stored in a uint64_t array that includes the context, like
101  * the action ID it is interested in.  If user space is getting the value from
102  * the kernel, then the get side of the sysctl is valid.  If it is setting the
103  * value, then the get pointers are left NULL.
104  *
105  * These functions handle marshalling and unmarshalling data from sysctls.
106  */
107 
108 static int
kperf_sysctl_get_set_uint32(struct sysctl_req * req,uint32_t (* get)(void),int (* set)(uint32_t))109 kperf_sysctl_get_set_uint32(struct sysctl_req *req,
110     uint32_t (*get)(void), int (*set)(uint32_t))
111 {
112 	assert(req != NULL);
113 	assert(get != NULL);
114 	assert(set != NULL);
115 
116 	uint32_t value = 0;
117 	if (req->oldptr) {
118 		value = get();
119 	}
120 
121 	int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
122 
123 	if (error || !req->newptr) {
124 		return error;
125 	}
126 
127 	return set(value);
128 }
129 
130 static int
kperf_sysctl_get_set_int(struct sysctl_req * req,int (* get)(void),int (* set)(int))131 kperf_sysctl_get_set_int(struct sysctl_req *req,
132     int (*get)(void), int (*set)(int))
133 {
134 	assert(req != NULL);
135 	assert(get != NULL);
136 	assert(set != NULL);
137 
138 	int value = 0;
139 	if (req->oldptr) {
140 		value = get();
141 	}
142 
143 	int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
144 
145 	if (error || !req->newptr) {
146 		return error;
147 	}
148 
149 	return set(value);
150 }
151 
152 static int
kperf_sysctl_get_set_uint64(struct sysctl_req * req,uint64_t (* get)(void),int (* set)(uint64_t))153 kperf_sysctl_get_set_uint64(struct sysctl_req *req,
154     uint64_t (*get)(void), int (*set)(uint64_t))
155 {
156 	assert(req != NULL);
157 	assert(get != NULL);
158 	assert(set != NULL);
159 
160 	uint64_t value = 0;
161 	if (req->oldptr) {
162 		value = get();
163 	}
164 
165 	int error = sysctl_io_number(req, (long long)value, sizeof(value), &value, NULL);
166 
167 	if (error || !req->newptr) {
168 		return error;
169 	}
170 
171 	return set(value);
172 }
173 
174 static int
kperf_sysctl_get_set_unsigned_uint32(struct sysctl_req * req,int (* get)(unsigned int,uint32_t *),int (* set)(unsigned int,uint32_t))175 kperf_sysctl_get_set_unsigned_uint32(struct sysctl_req *req,
176     int (*get)(unsigned int, uint32_t *), int (*set)(unsigned int, uint32_t))
177 {
178 	assert(req != NULL);
179 	assert(get != NULL);
180 	assert(set != NULL);
181 
182 	int error = 0;
183 	uint64_t inputs[2] = {};
184 
185 	if (req->newptr == USER_ADDR_NULL) {
186 		return EFAULT;
187 	}
188 
189 	if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) {
190 		return error;
191 	}
192 
193 	unsigned int action_id = (unsigned int)inputs[0];
194 	uint32_t new_value = (uint32_t)inputs[1];
195 
196 	if (req->oldptr != USER_ADDR_NULL) {
197 		uint32_t value_out = 0;
198 		if ((error = get(action_id, &value_out))) {
199 			return error;
200 		}
201 
202 		inputs[1] = value_out;
203 
204 		return copyout(inputs, req->oldptr, sizeof(inputs));
205 	} else {
206 		return set(action_id, new_value);
207 	}
208 }
209 
210 /*
211  * These functions are essentially the same as the generic
212  * kperf_sysctl_get_set_unsigned_uint32, except they have unique input sizes.
213  */
214 
215 static int
sysctl_timer_period(struct sysctl_req * req)216 sysctl_timer_period(struct sysctl_req *req)
217 {
218 	uint64_t inputs[2] = {};
219 
220 	if (req->newptr == USER_ADDR_NULL) {
221 		return EFAULT;
222 	}
223 
224 	int error = 0;
225 	if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) {
226 		return error;
227 	}
228 	unsigned int timer = (unsigned int)inputs[0];
229 	uint64_t new_period = inputs[1];
230 
231 	if (req->oldptr != USER_ADDR_NULL) {
232 		uint64_t period_out = 0;
233 		if ((error = kptimer_get_period(timer, &period_out))) {
234 			return error;
235 		}
236 
237 		inputs[1] = period_out;
238 		return copyout(inputs, req->oldptr, sizeof(inputs));
239 	} else {
240 		return kptimer_set_period(timer, new_period);
241 	}
242 }
243 
244 static int
sysctl_action_filter(struct sysctl_req * req,bool is_task_t)245 sysctl_action_filter(struct sysctl_req *req, bool is_task_t)
246 {
247 	int error = 0;
248 	uint64_t inputs[2] = {};
249 
250 	assert(req != NULL);
251 
252 	if (req->newptr == USER_ADDR_NULL) {
253 		return EFAULT;
254 	}
255 
256 	if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) {
257 		return error;
258 	}
259 
260 	unsigned int actionid = (unsigned int)inputs[0];
261 	int new_filter = (int)inputs[1];
262 
263 	if (req->oldptr != USER_ADDR_NULL) {
264 		int filter_out;
265 		if ((error = kperf_action_get_filter(actionid, &filter_out))) {
266 			return error;
267 		}
268 
269 		inputs[1] = (uint64_t)filter_out;
270 		return copyout(inputs, req->oldptr, sizeof(inputs));
271 	} else {
272 		int pid = is_task_t ? kperf_port_to_pid((mach_port_name_t)new_filter)
273 		    : new_filter;
274 
275 		return kperf_action_set_filter(actionid, pid);
276 	}
277 }
278 
279 static int
sysctl_bless(struct sysctl_req * req)280 sysctl_bless(struct sysctl_req *req)
281 {
282 	int value = ktrace_get_owning_pid();
283 	int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
284 
285 	if (error || !req->newptr) {
286 		return error;
287 	}
288 
289 	return ktrace_set_owning_pid(value);
290 }
291 
292 /* sysctl handlers that use the generic functions */
293 
294 static int
sysctl_action_samplers(struct sysctl_req * req)295 sysctl_action_samplers(struct sysctl_req *req)
296 {
297 	return kperf_sysctl_get_set_unsigned_uint32(req,
298 	           kperf_action_get_samplers, kperf_action_set_samplers);
299 }
300 
301 static int
sysctl_action_userdata(struct sysctl_req * req)302 sysctl_action_userdata(struct sysctl_req *req)
303 {
304 	return kperf_sysctl_get_set_unsigned_uint32(req,
305 	           kperf_action_get_userdata, kperf_action_set_userdata);
306 }
307 
308 static int
sysctl_action_ucallstack_depth(struct sysctl_req * req)309 sysctl_action_ucallstack_depth(struct sysctl_req *req)
310 {
311 	return kperf_sysctl_get_set_unsigned_uint32(req,
312 	           kperf_action_get_ucallstack_depth, kperf_action_set_ucallstack_depth);
313 }
314 
315 static int
sysctl_action_kcallstack_depth(struct sysctl_req * req)316 sysctl_action_kcallstack_depth(struct sysctl_req *req)
317 {
318 	return kperf_sysctl_get_set_unsigned_uint32(req,
319 	           kperf_action_get_kcallstack_depth, kperf_action_set_kcallstack_depth);
320 }
321 
322 static int
sysctl_kdebug_action(struct sysctl_req * req)323 sysctl_kdebug_action(struct sysctl_req *req)
324 {
325 	return kperf_sysctl_get_set_int(req, kperf_kdebug_get_action,
326 	           kperf_kdebug_set_action);
327 }
328 
329 static int
sysctl_kdebug_filter(struct sysctl_req * req)330 sysctl_kdebug_filter(struct sysctl_req *req)
331 {
332 	assert(req != NULL);
333 
334 	if (req->oldptr != USER_ADDR_NULL) {
335 		struct kperf_kdebug_filter *filter = NULL;
336 		uint32_t n_debugids = kperf_kdebug_get_filter(&filter);
337 		size_t filter_size = KPERF_KDEBUG_FILTER_SIZE(n_debugids);
338 
339 		if (n_debugids == 0) {
340 			return EINVAL;
341 		}
342 
343 		return SYSCTL_OUT(req, filter, filter_size);
344 	} else if (req->newptr != USER_ADDR_NULL) {
345 		return kperf_kdebug_set_filter(req->newptr, (uint32_t)req->newlen);
346 	} else {
347 		return EINVAL;
348 	}
349 }
350 
351 static uint32_t
kperf_sampling_get(void)352 kperf_sampling_get(void)
353 {
354 	return kperf_is_sampling();
355 }
356 
357 static int
kperf_sampling_set(uint32_t sample_start)358 kperf_sampling_set(uint32_t sample_start)
359 {
360 	if (sample_start) {
361 		return kperf_enable_sampling();
362 	} else {
363 		return kperf_disable_sampling();
364 	}
365 }
366 
367 static int
sysctl_sampling(struct sysctl_req * req)368 sysctl_sampling(struct sysctl_req *req)
369 {
370 	return kperf_sysctl_get_set_uint32(req, kperf_sampling_get,
371 	           kperf_sampling_set);
372 }
373 
374 static int
sysctl_action_count(struct sysctl_req * req)375 sysctl_action_count(struct sysctl_req *req)
376 {
377 	return kperf_sysctl_get_set_uint32(req, kperf_action_get_count,
378 	           kperf_action_set_count);
379 }
380 
381 static int
sysctl_timer_count(struct sysctl_req * req)382 sysctl_timer_count(struct sysctl_req *req)
383 {
384 	return kperf_sysctl_get_set_uint32(req, kptimer_get_count,
385 	           kptimer_set_count);
386 }
387 
388 static int
sysctl_timer_action(struct sysctl_req * req)389 sysctl_timer_action(struct sysctl_req *req)
390 {
391 	return kperf_sysctl_get_set_unsigned_uint32(req, kptimer_get_action,
392 	           kptimer_set_action);
393 }
394 
395 static int
sysctl_timer_pet(struct sysctl_req * req)396 sysctl_timer_pet(struct sysctl_req *req)
397 {
398 	return kperf_sysctl_get_set_uint32(req, kptimer_get_pet_timerid,
399 	           kptimer_set_pet_timerid);
400 }
401 
402 static int
sysctl_bless_preempt(struct sysctl_req * req)403 sysctl_bless_preempt(struct sysctl_req *req)
404 {
405 	return sysctl_io_number(req, ktrace_root_set_owner_allowed,
406 	           sizeof(ktrace_root_set_owner_allowed),
407 	           &ktrace_root_set_owner_allowed, NULL);
408 }
409 
410 static int
sysctl_kperf_reset(struct sysctl_req * req)411 sysctl_kperf_reset(struct sysctl_req *req)
412 {
413 	int should_reset = 0;
414 
415 	int error = sysctl_io_number(req, should_reset, sizeof(should_reset),
416 	    &should_reset, NULL);
417 	if (error) {
418 		return error;
419 	}
420 
421 	if (should_reset) {
422 		ktrace_reset(KTRACE_KPERF);
423 	}
424 	return 0;
425 }
426 
427 static int
sysctl_pet_idle_rate(struct sysctl_req * req)428 sysctl_pet_idle_rate(struct sysctl_req *req)
429 {
430 	return kperf_sysctl_get_set_int(req, kppet_get_idle_rate,
431 	           kppet_set_idle_rate);
432 }
433 
434 static int
sysctl_lightweight_pet(struct sysctl_req * req)435 sysctl_lightweight_pet(struct sysctl_req *req)
436 {
437 	return kperf_sysctl_get_set_int(req, kppet_get_lightweight_pet,
438 	           kppet_set_lightweight_pet);
439 }
440 
441 static int
sysctl_kdbg_cswitch(struct sysctl_req * req)442 sysctl_kdbg_cswitch(struct sysctl_req *req)
443 {
444 	return kperf_sysctl_get_set_int(req, kperf_kdbg_cswitch_get,
445 	           kperf_kdbg_cswitch_set);
446 }
447 
448 static int
sysctl_lazy_wait_time_threshold(struct sysctl_req * req)449 sysctl_lazy_wait_time_threshold(struct sysctl_req *req)
450 {
451 	return kperf_sysctl_get_set_uint64(req, kperf_lazy_get_wait_time_threshold,
452 	           kperf_lazy_set_wait_time_threshold);
453 }
454 
455 static int
sysctl_lazy_wait_action(struct sysctl_req * req)456 sysctl_lazy_wait_action(struct sysctl_req *req)
457 {
458 	return kperf_sysctl_get_set_int(req, kperf_lazy_get_wait_action,
459 	           kperf_lazy_set_wait_action);
460 }
461 
462 static int
sysctl_lazy_cpu_time_threshold(struct sysctl_req * req)463 sysctl_lazy_cpu_time_threshold(struct sysctl_req *req)
464 {
465 	return kperf_sysctl_get_set_uint64(req, kperf_lazy_get_cpu_time_threshold,
466 	           kperf_lazy_set_cpu_time_threshold);
467 }
468 
469 static int
sysctl_lazy_cpu_action(struct sysctl_req * req)470 sysctl_lazy_cpu_action(struct sysctl_req *req)
471 {
472 	return kperf_sysctl_get_set_int(req, kperf_lazy_get_cpu_action,
473 	           kperf_lazy_set_cpu_action);
474 }
475 
476 static int
477 kperf_sysctl SYSCTL_HANDLER_ARGS
478 {
479 #pragma unused(oidp, arg2)
480 	int ret;
481 	enum kperf_request type = (enum kperf_request)arg1;
482 
483 	ktrace_lock();
484 
485 	if (req->oldptr == USER_ADDR_NULL && req->newptr != USER_ADDR_NULL) {
486 		if ((ret = ktrace_configure(KTRACE_KPERF))) {
487 			ktrace_unlock();
488 			return ret;
489 		}
490 	} else {
491 		if ((ret = ktrace_read_check())) {
492 			ktrace_unlock();
493 			return ret;
494 		}
495 	}
496 
497 	/* which request */
498 	switch (type) {
499 	case REQ_ACTION_COUNT:
500 		ret = sysctl_action_count(req);
501 		break;
502 	case REQ_ACTION_SAMPLERS:
503 		ret = sysctl_action_samplers(req);
504 		break;
505 	case REQ_ACTION_USERDATA:
506 		ret = sysctl_action_userdata(req);
507 		break;
508 	case REQ_TIMER_COUNT:
509 		ret = sysctl_timer_count(req);
510 		break;
511 	case REQ_TIMER_PERIOD:
512 		ret = sysctl_timer_period(req);
513 		break;
514 	case REQ_TIMER_PET:
515 		ret = sysctl_timer_pet(req);
516 		break;
517 	case REQ_TIMER_ACTION:
518 		ret = sysctl_timer_action(req);
519 		break;
520 	case REQ_SAMPLING:
521 		ret = sysctl_sampling(req);
522 		break;
523 	case REQ_KDBG_CSWITCH:
524 		ret = sysctl_kdbg_cswitch(req);
525 		break;
526 	case REQ_ACTION_FILTER_BY_TASK:
527 		ret = sysctl_action_filter(req, true);
528 		break;
529 	case REQ_ACTION_FILTER_BY_PID:
530 		ret = sysctl_action_filter(req, false);
531 		break;
532 	case REQ_KDEBUG_ACTION:
533 		ret = sysctl_kdebug_action(req);
534 		break;
535 	case REQ_KDEBUG_FILTER:
536 		ret = sysctl_kdebug_filter(req);
537 		break;
538 	case REQ_PET_IDLE_RATE:
539 		ret = sysctl_pet_idle_rate(req);
540 		break;
541 	case REQ_BLESS_PREEMPT:
542 		ret = sysctl_bless_preempt(req);
543 		break;
544 	case REQ_RESET:
545 		ret = sysctl_kperf_reset(req);
546 		break;
547 	case REQ_ACTION_UCALLSTACK_DEPTH:
548 		ret = sysctl_action_ucallstack_depth(req);
549 		break;
550 	case REQ_ACTION_KCALLSTACK_DEPTH:
551 		ret = sysctl_action_kcallstack_depth(req);
552 		break;
553 	case REQ_LIGHTWEIGHT_PET:
554 		ret = sysctl_lightweight_pet(req);
555 		break;
556 	case REQ_LAZY_WAIT_TIME_THRESHOLD:
557 		ret = sysctl_lazy_wait_time_threshold(req);
558 		break;
559 	case REQ_LAZY_WAIT_ACTION:
560 		ret = sysctl_lazy_wait_action(req);
561 		break;
562 	case REQ_LAZY_CPU_TIME_THRESHOLD:
563 		ret = sysctl_lazy_cpu_time_threshold(req);
564 		break;
565 	case REQ_LAZY_CPU_ACTION:
566 		ret = sysctl_lazy_cpu_action(req);
567 		break;
568 	default:
569 		ret = ENOENT;
570 		break;
571 	}
572 
573 	ktrace_unlock();
574 
575 	return ret;
576 }
577 
578 static int
579 kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS
580 {
581 #pragma unused(oidp, arg2)
582 	int ret;
583 
584 	ktrace_lock();
585 
586 	/* if setting a new "blessed pid" (ktrace owning pid) */
587 	if (req->newptr != USER_ADDR_NULL) {
588 		/*
589 		 * root can bypass the ktrace check when a flag is set (for
590 		 * backwards compatibility) or when ownership is maintained over
591 		 * subsystems resets (to allow the user space process that set
592 		 * ownership to unset it).
593 		 */
594 		if (!((ktrace_root_set_owner_allowed ||
595 		    ktrace_keep_ownership_on_reset) &&
596 		    kauth_cred_issuser(kauth_cred_get()))) {
597 			if ((ret = ktrace_configure(KTRACE_KPERF))) {
598 				ktrace_unlock();
599 				return ret;
600 			}
601 		}
602 	} else {
603 		if ((ret = ktrace_read_check())) {
604 			ktrace_unlock();
605 			return ret;
606 		}
607 	}
608 
609 	/* which request */
610 	if ((uintptr_t)arg1 == REQ_BLESS) {
611 		ret = sysctl_bless(req);
612 	} else {
613 		ret = ENOENT;
614 	}
615 
616 	ktrace_unlock();
617 
618 	return ret;
619 }
620 
621 /* root kperf node */
622 
623 SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
624     "kperf");
625 
626 /* actions */
627 
628 SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
629     "action");
630 
631 SYSCTL_PROC(_kperf_action, OID_AUTO, count,
632     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED |
633     CTLFLAG_MASKED,
634     (void *)REQ_ACTION_COUNT,
635     sizeof(int), kperf_sysctl, "I", "Number of actions");
636 
637 SYSCTL_PROC(_kperf_action, OID_AUTO, samplers,
638     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
639     (void *)REQ_ACTION_SAMPLERS,
640     3 * sizeof(uint64_t), kperf_sysctl, "UQ",
641     "What to sample when a trigger fires an action");
642 
643 SYSCTL_PROC(_kperf_action, OID_AUTO, userdata,
644     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
645     (void *)REQ_ACTION_USERDATA,
646     3 * sizeof(uint64_t), kperf_sysctl, "UQ",
647     "User data to attribute to action");
648 
649 SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_task,
650     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
651     (void *)REQ_ACTION_FILTER_BY_TASK,
652     3 * sizeof(uint64_t), kperf_sysctl, "UQ",
653     "Apply a task filter to the action");
654 
655 SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_pid,
656     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
657     (void *)REQ_ACTION_FILTER_BY_PID,
658     3 * sizeof(uint64_t), kperf_sysctl, "UQ",
659     "Apply a pid filter to the action");
660 
661 SYSCTL_PROC(_kperf_action, OID_AUTO, ucallstack_depth,
662     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
663     (void *)REQ_ACTION_UCALLSTACK_DEPTH,
664     sizeof(int), kperf_sysctl, "I",
665     "Maximum number of frames to include in user callstacks");
666 
667 SYSCTL_PROC(_kperf_action, OID_AUTO, kcallstack_depth,
668     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
669     (void *)REQ_ACTION_KCALLSTACK_DEPTH,
670     sizeof(int), kperf_sysctl, "I",
671     "Maximum number of frames to include in kernel callstacks");
672 
673 /* timers */
674 
675 SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
676     "timer");
677 
678 SYSCTL_PROC(_kperf_timer, OID_AUTO, count,
679     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
680     | CTLFLAG_MASKED,
681     (void *)REQ_TIMER_COUNT,
682     sizeof(int), kperf_sysctl, "I", "Number of time triggers");
683 
684 SYSCTL_PROC(_kperf_timer, OID_AUTO, period,
685     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
686     (void *)REQ_TIMER_PERIOD,
687     2 * sizeof(uint64_t), kperf_sysctl, "UQ",
688     "Timer number and period");
689 
690 SYSCTL_PROC(_kperf_timer, OID_AUTO, action,
691     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
692     (void *)REQ_TIMER_ACTION,
693     2 * sizeof(uint64_t), kperf_sysctl, "UQ",
694     "Timer number and actionid");
695 
696 SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer,
697     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
698     | CTLFLAG_MASKED,
699     (void *)REQ_TIMER_PET,
700     sizeof(int), kperf_sysctl, "I", "Which timer ID does PET");
701 
702 /* kdebug trigger */
703 
704 SYSCTL_NODE(_kperf, OID_AUTO, kdebug, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
705     "kdebug");
706 
707 SYSCTL_PROC(_kperf_kdebug, OID_AUTO, action,
708     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
709     | CTLFLAG_MASKED,
710     (void*)REQ_KDEBUG_ACTION,
711     sizeof(int), kperf_sysctl, "I", "ID of action to trigger on kdebug events");
712 
713 SYSCTL_PROC(_kperf_kdebug, OID_AUTO, filter,
714     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
715     (void*)REQ_KDEBUG_FILTER,
716     sizeof(int), kperf_sysctl, "P", "The filter that determines which kdebug events trigger a sample");
717 
718 /* lazy sampling */
719 
720 SYSCTL_NODE(_kperf, OID_AUTO, lazy, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
721     "lazy");
722 
723 SYSCTL_PROC(_kperf_lazy, OID_AUTO, wait_time_threshold,
724     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
725     (void *)REQ_LAZY_WAIT_TIME_THRESHOLD,
726     sizeof(uint64_t), kperf_sysctl, "UQ",
727     "How many ticks a thread must wait to take a sample");
728 
729 SYSCTL_PROC(_kperf_lazy, OID_AUTO, wait_action,
730     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
731     (void *)REQ_LAZY_WAIT_ACTION,
732     sizeof(uint64_t), kperf_sysctl, "UQ",
733     "Which action to fire when a thread waits longer than threshold");
734 
735 SYSCTL_PROC(_kperf_lazy, OID_AUTO, cpu_time_threshold,
736     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
737     (void *)REQ_LAZY_CPU_TIME_THRESHOLD,
738     sizeof(uint64_t), kperf_sysctl, "UQ",
739     "Minimum number of ticks a CPU must run between samples");
740 
741 SYSCTL_PROC(_kperf_lazy, OID_AUTO, cpu_action,
742     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
743     (void *)REQ_LAZY_CPU_ACTION,
744     sizeof(uint64_t), kperf_sysctl, "UQ",
745     "Which action to fire for lazy CPU samples");
746 
747 /* misc */
748 
749 SYSCTL_PROC(_kperf, OID_AUTO, sampling,
750     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
751     | CTLFLAG_MASKED,
752     (void *)REQ_SAMPLING,
753     sizeof(int), kperf_sysctl, "I", "Sampling running");
754 
755 SYSCTL_PROC(_kperf, OID_AUTO, reset,
756     CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
757     (void *)REQ_RESET,
758     0, kperf_sysctl, "-", "Reset kperf");
759 
760 SYSCTL_PROC(_kperf, OID_AUTO, blessed_pid,
761     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED         /* must be root */
762     | CTLFLAG_MASKED,
763     (void *)REQ_BLESS,
764     sizeof(int), kperf_sysctl_bless_handler, "I", "Blessed pid");
765 
766 SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt,
767     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED |
768     CTLFLAG_MASKED,
769     (void *)REQ_BLESS_PREEMPT,
770     sizeof(int), kperf_sysctl, "I", "Blessed preemption");
771 
772 SYSCTL_PROC(_kperf, OID_AUTO, kdbg_cswitch,
773     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
774     | CTLFLAG_MASKED,
775     (void *)REQ_KDBG_CSWITCH,
776     sizeof(int), kperf_sysctl, "I", "Generate context switch info");
777 
778 SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate,
779     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
780     | CTLFLAG_MASKED,
781     (void *)REQ_PET_IDLE_RATE,
782     sizeof(int), kperf_sysctl, "I",
783     "Rate at which unscheduled threads are forced to be sampled in "
784     "PET mode");
785 
786 SYSCTL_PROC(_kperf, OID_AUTO, lightweight_pet,
787     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED
788     | CTLFLAG_MASKED,
789     (void *)REQ_LIGHTWEIGHT_PET,
790     sizeof(int), kperf_sysctl, "I",
791     "Status of lightweight PET mode");
792 
793 /* limits */
794 
795 SYSCTL_NODE(_kperf, OID_AUTO, limits, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
796     "limits");
797 
798 enum kperf_limit_request {
799 	REQ_LIM_PERIOD_NS,
800 	REQ_LIM_BG_PERIOD_NS,
801 	REQ_LIM_PET_PERIOD_NS,
802 	REQ_LIM_BG_PET_PERIOD_NS,
803 };
804 
805 static int
806 kperf_sysctl_limits SYSCTL_HANDLER_ARGS
807 {
808 #pragma unused(oidp, arg2)
809 	enum kptimer_period_limit limit = (enum kptimer_period_limit)arg1;
810 	if (limit >= KTPL_MAX) {
811 		return ENOENT;
812 	}
813 	uint64_t period = kptimer_minperiods_ns[limit];
814 	return sysctl_io_number(req, (long long)period, sizeof(period), &period,
815 	           NULL);
816 }
817 
818 SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_period_ns,
819     CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
820     (void *)REQ_LIM_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits,
821     "Q", "Minimum timer period in nanoseconds");
822 SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_bg_period_ns,
823     CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
824     (void *)REQ_LIM_BG_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits,
825     "Q", "Minimum background timer period in nanoseconds");
826 SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_pet_period_ns,
827     CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
828     (void *)REQ_LIM_PET_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits,
829     "Q", "Minimum PET timer period in nanoseconds");
830 SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_bg_pet_period_ns,
831     CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
832     (void *)REQ_LIM_BG_PET_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits,
833     "Q", "Minimum background PET timer period in nanoseconds");
834 SYSCTL_INT(_kperf_limits, OID_AUTO, max_action_count, CTLFLAG_RD | CTLFLAG_LOCKED,
835     &kperf_max_actions, 0, "Maximum number of kperf actions");
836 
837 /* debug */
838 SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW | CTLFLAG_LOCKED,
839     &kperf_debug_level, 0, "debug level");
840 
841 #if DEVELOPMENT || DEBUG
842 SYSCTL_QUAD(_kperf, OID_AUTO, already_pending_ipis,
843     CTLFLAG_RD | CTLFLAG_LOCKED,
844     &kperf_pending_ipis, "");
845 #endif /* DEVELOPMENT || DEBUG */
846