xref: /xnu-10002.81.5/bsd/kern/subr_log_stream.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/assert.h>
30 #include <kern/bits.h>
31 #include <kern/clock.h>
32 #include <kern/counter.h>
33 #include <kern/cpu_data.h>
34 #include <kern/kalloc.h>
35 #include <kern/locks.h>
36 #include <kern/sched_prim.h>
37 #include <kern/simple_lock.h>
38 #include <libkern/libkern.h>
39 #include <machine/atomic.h>
40 #include <machine/simple_lock.h>
41 #include <os/log_private.h>
42 #include <pexpert/pexpert.h>
43 #include <sys/conf.h>
44 #include <sys/ioctl.h>
45 #include <sys/param.h>
46 #include <sys/sysctl.h>
47 #include <sys/vnode.h>
48 #include <firehose/chunk_private.h>
49 #include <firehose/tracepoint_private.h>
50 
51 #if DEVELOPMENT || DEBUG
52 #define LOG_STREAM_VERIFY
53 #endif /* DEVELOPMENT || DEBUG */
54 
55 #define LOG_MAX_SIZE            (1 << OS_LOG_MAX_SIZE_ORDER)
56 #define LOG_SIZE_VALID(v)       ((v) > 0 && (v) <= LOG_MAX_SIZE)
57 #define LOG_BLK_EXP             (6) // stream/cache block size order (64 bytes)
58 #define LOG_BLK_SIZE            (1 << LOG_BLK_EXP) // block size
59 #define LOG_BLK_HEADER          (1) // log header block map indicator
60 #define LOG_BLK_BODY            (0) // log body block map indicator
61 #define LOG_STREAM_MIN_SIZE     (8 * LOG_MAX_SIZE)
62 #define LOG_STREAM_MAX_SIZE     (1024 * 1024)
63 #define LOG_CACHE_MIN_SIZE      (2 * LOG_MAX_SIZE + LOG_BLK_SIZE)
64 #define LOG_CACHE_MAX_SIZE      (8 * 1024)
65 #define LOG_CACHE_EMPTY         (SIZE_MAX)
66 #define ls_atomic(T)            union { T v; _Atomic(T) av; }
67 
68 __options_decl(log_stream_siopts_t, uint32_t, {
69 	LOG_STREAM_OPT_NBIO     = 0x01,
70 	LOG_STREAM_OPT_ASYNC    = 0x02,
71 	LOG_STREAM_OPT_RDWAIT   = 0x04,
72 });
73 
74 typedef struct {
75 	lck_spin_t          lsi_lock;
76 	log_stream_siopts_t lsi_opts;
77 	struct selinfo      lsi_selinfo;
78 	void                *lsi_channel;
79 } log_stream_si_t;
80 
81 typedef union log_stream_ticket {
82 	struct {
83 		uint64_t lst_size  : 16;
84 		uint64_t lst_loc   : 48;
85 	};
86 	uint64_t lst_value;
87 } log_stream_ticket_t;
88 
89 typedef struct {
90 	uint64_t                        lsm_ts;
91 	struct firehose_tracepoint_s    lsm_ft;
92 } __attribute__((packed)) log_stream_msg_t;
93 
94 typedef struct {
95 	bool                ls_enabled;
96 	bool                ls_snapshot;
97 	uint8_t             *ls_buf;
98 	uint8_t             *ls_blk;
99 	size_t              ls_blk_count;
100 	_Atomic(size_t)     ls_reserved;
101 	ls_atomic(size_t)   ls_commited;
102 	size_t              ls_commited_wraps;
103 } log_stream_t;
104 
105 typedef struct {
106 	log_stream_ticket_t lss_ticket;
107 	log_stream_t        *lss_stream;
108 	log_stream_t        lss_snapshot;
109 } log_stream_session_t;
110 
111 typedef struct {
112 	uint8_t         *lc_buf;
113 	uint8_t         *lc_blk;
114 	size_t          lc_blk_count;
115 	size_t          lc_blk_pos;
116 	log_stream_t    *lc_stream;
117 	size_t          lc_stream_pos;
118 } log_cache_t;
119 
120 TUNABLE(size_t, log_stream_size, "oslog_stream_size", LOG_STREAM_MIN_SIZE);
121 TUNABLE(size_t, log_stream_cache_size, "oslog_stream_csize", LOG_CACHE_MIN_SIZE);
122 #ifdef LOG_STREAM_VERIFY
123 TUNABLE(bool, log_stream_verify, "-oslog_stream_verify", false);
124 #endif /* LOG_STREAM_VERIFY */
125 
126 LCK_GRP_DECLARE(log_stream_lock_grp, "oslog_stream");
127 LCK_MTX_DECLARE(log_stream_lock, &log_stream_lock_grp);
128 
129 #define log_stream_lock()       lck_mtx_lock(&log_stream_lock)
130 #define log_stream_unlock()     lck_mtx_unlock(&log_stream_lock)
131 #define log_stream_si_lock(s)   lck_spin_lock(&(s)->lsi_lock)
132 #define log_stream_si_unlock(s) lck_spin_unlock(&(s)->lsi_lock)
133 
134 SCALABLE_COUNTER_DEFINE(oslog_s_total_msgcount);
135 SCALABLE_COUNTER_DEFINE(oslog_s_metadata_msgcount);
136 SCALABLE_COUNTER_DEFINE(oslog_s_streamed_msgcount);
137 SCALABLE_COUNTER_DEFINE(oslog_s_dropped_msgcount);
138 SCALABLE_COUNTER_DEFINE(oslog_s_error_count);
139 
140 extern d_open_t     oslog_streamopen;
141 extern d_close_t    oslog_streamclose;
142 extern d_read_t     oslog_streamread;
143 extern d_ioctl_t    oslog_streamioctl;
144 extern d_select_t   oslog_streamselect;
145 extern bool         os_log_disabled(void);
146 
147 // This should really go to a suitable internal header file.
148 extern void oslog_stream(bool, firehose_tracepoint_id_u, uint64_t, const void *, size_t);
149 
150 static log_stream_t     log_stream;
151 static log_cache_t      log_stream_cache;
152 static log_stream_si_t  log_stream_si;
153 
154 #ifdef LOG_STREAM_VERIFY
155 static const uint64_t log_stream_msg_tag = 0xaabbccddeeff7788;
156 #endif /* LOG_STREAM_VERIFY */
157 
158 /*
159  * OSLog Streaming
160  *
161  * OSLog streaming has two parts, a log stream buffer and a log stream cache.
162  *
163  * Both, the stream and the cache, are evenly divided into LOG_BLK_SIZE bytes
164  * sized blocks.
165  *
166  * The stream cache allows the reader to access logs quickly without interfering
167  * with writers which log into the stream directly without locking. The reader
168  * refills the cache once it consumes all logs in the cache. Current
169  * implementation supports one reader only (diagnosticd effectively).
170  *
171  * In order to enable/disable the log stream or to access the cache, a log
172  * stream lock (log_stream_lock) has to be taken. This is only for stream
173  * open/close and read syscalls respectively to do.
174  *
175  * OSLog Streaming supports following boot arguments:
176  *
177  * 1) oslog_stream_size=N
178  *    Specifies a log stream buffer size with LOG_STREAM_MIN_SIZE as default and
179  *    [LOG_STREAM_MIN_SIZE, LOG_STREAM_MAX_SIZE] allowed range.
180  *
181  * 2) oslog_stream_csize=N
182  *    Specifies a log stream cache size with LOG_CACHE_MIN_SIZE as minimum and
183  *    default. LOG_STREAM_MAX_SIZE or 1/2 log stream size is maximum, whichever
184  *    is smaller.
185  *
186  * 3) -oslog_stream_verify
187  *    Alters each log timestamp with a tag. This allows to verify that internal
188  *    logic properly stores and retrieves log messages to/from the log stream
189  *    and the cache. Available only on DEBUG/DEVELOPMENT builds.
190  */
191 
192 _Static_assert(LOG_BLK_SIZE > sizeof(log_stream_msg_t),
193     "Invalid log stream block size");
194 
195 /*
196  * An absolute stream cache size minimum, counted in blocks, must accomodate two
197  * complete max sized logs (each 17 blocks large) minus one block. This assert
198  * has to be kept in sync with a stream cache logic, not with LOG_CACHE_MIN_SIZE
199  * definition which can be any number matching assert conditions.
200  */
201 _Static_assert(LOG_CACHE_MIN_SIZE >= (2 * LOG_MAX_SIZE + LOG_BLK_SIZE),
202     "Invalid minimum log stream cache size");
203 
204 _Static_assert(LOG_CACHE_MIN_SIZE < LOG_STREAM_MIN_SIZE / 2,
205     "Minimum log stream cache size is larger than 1/2 of log stream minimum size");
206 
207 static inline size_t
blocks_size(size_t nblocks)208 blocks_size(size_t nblocks)
209 {
210 	return nblocks << LOG_BLK_EXP;
211 }
212 
213 static inline size_t
blocks_count(size_t amount)214 blocks_count(size_t amount)
215 {
216 	return amount >> LOG_BLK_EXP;
217 }
218 
219 static inline size_t
blocks_needed(size_t amount)220 blocks_needed(size_t amount)
221 {
222 	if (amount % LOG_BLK_SIZE == 0) {
223 		return blocks_count(amount);
224 	}
225 	return blocks_count(amount) + 1;
226 }
227 
228 static inline size_t
log_stream_block(const log_stream_t * ls,uint64_t block_seq)229 log_stream_block(const log_stream_t *ls, uint64_t block_seq)
230 {
231 	return block_seq % ls->ls_blk_count;
232 }
233 
234 static inline size_t
log_stream_offset(const log_stream_t * ls,uint64_t block_seq)235 log_stream_offset(const log_stream_t *ls, uint64_t block_seq)
236 {
237 	return blocks_size(log_stream_block(ls, block_seq));
238 }
239 
240 static size_t
log_stream_bytes(const log_stream_t * ls,size_t pos)241 log_stream_bytes(const log_stream_t *ls, size_t pos)
242 {
243 	assert(pos >= ls->ls_commited_wraps);
244 	return pos - ls->ls_commited_wraps;
245 }
246 
247 static size_t
log_stream_written(const log_stream_t * ls,size_t pos)248 log_stream_written(const log_stream_t *ls, size_t pos)
249 {
250 	assert(log_stream_bytes(ls, ls->ls_commited.v) >= pos);
251 	return log_stream_bytes(ls, ls->ls_commited.v) - pos;
252 }
253 
254 /*
255  * Makes a reservation for a given amount of the log stream space. The ticket
256  * allows to determine starting offset (lst_loc) and reserved size (lst_size).
257  * Calling log_stream_reserve() disables preemption and must be paired with
258  * log_stream_commit() once done.
259  */
260 static log_stream_ticket_t
log_stream_reserve(log_stream_t * ls,size_t amount)261 log_stream_reserve(log_stream_t *ls, size_t amount)
262 {
263 	assert(!ls->ls_snapshot);
264 	assert(amount > 0 && amount <= ls->ls_blk_count);
265 
266 	disable_preemption();
267 
268 	log_stream_ticket_t t = {
269 		.lst_size = amount,
270 		.lst_loc = os_atomic_add_orig(&ls->ls_reserved, amount, relaxed)
271 	};
272 	return t;
273 }
274 
275 /*
276  * Finishes the reservation once log stream data are written/read as needed.
277  * Counterpart to log_stream_reserve(), enables preemption.
278  */
279 static void
log_stream_commit(log_stream_t * ls,log_stream_ticket_t ticket)280 log_stream_commit(log_stream_t *ls, log_stream_ticket_t ticket)
281 {
282 	assert(!ls->ls_snapshot);
283 	assert(ticket.lst_size > 0 && ticket.lst_size <= ls->ls_blk_count);
284 
285 	if (ticket.lst_size == ls->ls_blk_count) {
286 		ls->ls_commited_wraps += ls->ls_blk_count;
287 	}
288 	os_atomic_add(&ls->ls_commited.av, ticket.lst_size, release);
289 
290 	enable_preemption();
291 }
292 
293 static void
log_stream_snapshot(const log_stream_t * ls,log_stream_t * snap)294 log_stream_snapshot(const log_stream_t *ls, log_stream_t *snap)
295 {
296 	size_t commited = os_atomic_load(&ls->ls_commited.av, acquire);
297 	*snap = *ls;
298 	snap->ls_commited.v = commited;
299 	snap->ls_snapshot = true;
300 }
301 
302 static uint64_t
log_stream_waitpoint(const log_stream_t * ls,log_stream_ticket_t ticket)303 log_stream_waitpoint(const log_stream_t *ls, log_stream_ticket_t ticket)
304 {
305 	const uint64_t block = log_stream_block(ls, ticket.lst_loc);
306 
307 	if (block + ticket.lst_size > ls->ls_blk_count) {
308 		return ticket.lst_loc;
309 	}
310 	return ticket.lst_loc - block;
311 }
312 
313 static void
log_stream_wait(log_stream_t * ls,log_stream_ticket_t ticket)314 log_stream_wait(log_stream_t *ls, log_stream_ticket_t ticket)
315 {
316 	const uint64_t waitpoint = log_stream_waitpoint(ls, ticket);
317 
318 	size_t commited = os_atomic_load(&ls->ls_commited.av, relaxed);
319 	while (waitpoint > commited) {
320 		commited = hw_wait_while_equals_long(&ls->ls_commited.av, commited);
321 	}
322 }
323 
324 static inline void
log_stream_msg_alter(log_stream_msg_t __unused * msg)325 log_stream_msg_alter(log_stream_msg_t __unused *msg)
326 {
327 #ifdef LOG_STREAM_VERIFY
328 	if (__improbable(log_stream_verify)) {
329 		msg->lsm_ts = log_stream_msg_tag; // alignment tag
330 	}
331 #endif /* LOG_STREAM_VERIFY */
332 }
333 
334 static inline void
log_stream_msg_verify(const log_stream_msg_t __unused * msg)335 log_stream_msg_verify(const log_stream_msg_t __unused *msg)
336 {
337 #ifdef LOG_STREAM_VERIFY
338 	if (__improbable(log_stream_verify)) {
339 		if (msg->lsm_ts != log_stream_msg_tag) {
340 			panic("missing log stream message tag at %p\n", msg);
341 		}
342 	}
343 #endif /* LOG_STREAM_VERIFY */
344 }
345 
346 static size_t
log_stream_msg_size(const log_stream_msg_t * m)347 log_stream_msg_size(const log_stream_msg_t *m)
348 {
349 	assert(LOG_SIZE_VALID(m->lsm_ft.ft_length));
350 	return sizeof(*m) + m->lsm_ft.ft_length;
351 }
352 
353 static size_t
log_stream_msg_make(log_stream_msg_t * msg,firehose_tracepoint_id_u ftid,uint64_t stamp,size_t data_len)354 log_stream_msg_make(log_stream_msg_t *msg, firehose_tracepoint_id_u ftid,
355     uint64_t stamp, size_t data_len)
356 {
357 	msg->lsm_ts = stamp;
358 	msg->lsm_ft.ft_thread = thread_tid(current_thread());
359 	msg->lsm_ft.ft_id.ftid_value = ftid.ftid_value;
360 	msg->lsm_ft.ft_length = data_len;
361 	log_stream_msg_alter(msg);
362 	return blocks_needed(log_stream_msg_size(msg));
363 }
364 
365 static void
log_session_make(log_stream_session_t * session,log_stream_t * ls,log_stream_ticket_t t)366 log_session_make(log_stream_session_t *session, log_stream_t *ls, log_stream_ticket_t t)
367 {
368 	session->lss_ticket = t;
369 	session->lss_stream = ls;
370 	log_stream_snapshot(session->lss_stream, &session->lss_snapshot);
371 }
372 
373 #define log_session_stream(s)   (&(s)->lss_snapshot)
374 #define log_session_loc(s)      ((s)->lss_ticket.lst_loc)
375 #define log_session_size(s)     ((s)->lss_ticket.lst_size)
376 
377 static void
log_session_start(log_stream_t * ls,size_t size,log_stream_session_t * session)378 log_session_start(log_stream_t *ls, size_t size, log_stream_session_t *session)
379 {
380 	assert(!ls->ls_snapshot);
381 
382 	log_stream_ticket_t ticket = log_stream_reserve(ls, size);
383 	log_stream_wait(ls, ticket);
384 	log_session_make(session, ls, ticket);
385 }
386 
387 static void
log_session_finish(log_stream_session_t * session)388 log_session_finish(log_stream_session_t *session)
389 {
390 	log_stream_commit(session->lss_stream, session->lss_ticket);
391 	bzero(session, sizeof(*session));
392 }
393 
394 static size_t
log_stream_avail(log_stream_t * ls,size_t pos)395 log_stream_avail(log_stream_t *ls, size_t pos)
396 {
397 	log_stream_t snap;
398 	log_stream_snapshot(ls, &snap);
399 	return log_stream_written(&snap, pos);
400 }
401 
402 static size_t
rbuf_copy(uint8_t * rb,size_t rb_size,size_t offset,size_t n,const uint8_t * d)403 rbuf_copy(uint8_t *rb, size_t rb_size, size_t offset, size_t n, const uint8_t *d)
404 {
405 	const size_t remains = MIN(n, rb_size - offset);
406 	assert(remains > 0);
407 
408 	(void) memcpy(rb + offset, d, remains);
409 	if (remains < n) {
410 		(void) memcpy(rb, d + remains, n - remains);
411 	}
412 
413 	return (offset + n) % rb_size;
414 }
415 
416 static void
rbuf_set(uint8_t * rb,size_t rb_size,size_t offset,size_t n,uint8_t v)417 rbuf_set(uint8_t *rb, size_t rb_size, size_t offset, size_t n, uint8_t v)
418 {
419 	const size_t remains = MIN(n, rb_size - offset);
420 	assert(remains > 0);
421 
422 	(void) memset(rb + offset, v, remains);
423 	if (remains < n) {
424 		(void) memset(rb, v, n - remains);
425 	}
426 }
427 
428 static void
rbuf_read(const uint8_t * rb,size_t rb_size,size_t offset,size_t n,uint8_t * b)429 rbuf_read(const uint8_t *rb, size_t rb_size, size_t offset, size_t n, uint8_t *b)
430 {
431 	assert(offset < rb_size);
432 	assert(n <= rb_size);
433 
434 	const size_t remains = MIN(n, rb_size - offset);
435 	assert(remains > 0);
436 
437 	memcpy(b, rb + offset, remains);
438 	if (remains < n) {
439 		memcpy(b + remains, rb, n - remains);
440 	}
441 }
442 
443 static size_t
log_stream_write(log_stream_t * ls,size_t offset,size_t n,const uint8_t * buf)444 log_stream_write(log_stream_t *ls, size_t offset, size_t n, const uint8_t *buf)
445 {
446 	return rbuf_copy(ls->ls_buf, blocks_size(ls->ls_blk_count), offset, n, buf);
447 }
448 
449 static void
log_stream_read(log_stream_t * ls,size_t blk_seq,size_t blk_cnt,uint8_t * buf)450 log_stream_read(log_stream_t *ls, size_t blk_seq, size_t blk_cnt, uint8_t *buf)
451 {
452 	rbuf_read(ls->ls_buf, blocks_size(ls->ls_blk_count),
453 	    log_stream_offset(ls, blk_seq), blocks_size(blk_cnt), buf);
454 }
455 
456 static void
log_stream_write_blocks(log_stream_t * ls,size_t blk_seq,size_t blk_count)457 log_stream_write_blocks(log_stream_t *ls, size_t blk_seq, size_t blk_count)
458 {
459 	const size_t blk_id = log_stream_block(ls, blk_seq);
460 	rbuf_set(ls->ls_blk, ls->ls_blk_count, blk_id, blk_count, LOG_BLK_BODY);
461 	ls->ls_blk[blk_id] = LOG_BLK_HEADER;
462 }
463 
464 static void
log_stream_read_blocks(log_stream_t * ls,size_t blk_seq,size_t blk_cnt,uint8_t * buf)465 log_stream_read_blocks(log_stream_t *ls, size_t blk_seq, size_t blk_cnt, uint8_t *buf)
466 {
467 	const size_t blk_id = log_stream_block(ls, blk_seq);
468 	rbuf_read(ls->ls_blk, ls->ls_blk_count, blk_id, blk_cnt, buf);
469 }
470 
471 static void
log_session_store(log_stream_session_t * lss,log_stream_msg_t * msg,const void * msg_data)472 log_session_store(log_stream_session_t *lss, log_stream_msg_t *msg, const void *msg_data)
473 {
474 	log_stream_t *ls = log_session_stream(lss);
475 	size_t blk_seq = log_session_loc(lss);
476 	size_t offset = log_stream_offset(ls, blk_seq);
477 
478 	offset = log_stream_write(ls, offset, sizeof(*msg), (uint8_t *)msg);
479 	(void) log_stream_write(ls, offset, msg->lsm_ft.ft_length, msg_data);
480 	log_stream_write_blocks(ls, blk_seq, log_session_size(lss));
481 }
482 
483 static size_t
log_stream_sync(const log_stream_t * ls,size_t pos)484 log_stream_sync(const log_stream_t *ls, size_t pos)
485 {
486 	assert(ls->ls_snapshot);
487 
488 	const size_t logged = log_stream_written(ls, 0);
489 	assert(pos <= logged);
490 
491 	if (pos + ls->ls_blk_count >= logged) {
492 		return 0;
493 	}
494 	if (__improbable(ls->ls_commited.v < ls->ls_blk_count)) {
495 		return logged - pos;
496 	}
497 	return logged - ls->ls_blk_count - pos;
498 }
499 
500 static bool
log_cache_refill(log_cache_t * lc)501 log_cache_refill(log_cache_t *lc)
502 {
503 	log_stream_t *ls = lc->lc_stream;
504 	assert(ls->ls_enabled);
505 
506 	if (log_stream_avail(ls, lc->lc_stream_pos) == 0) {
507 		return false;
508 	}
509 
510 	log_stream_session_t session;
511 	log_session_start(ls, ls->ls_blk_count, &session);
512 
513 	ls = log_session_stream(&session);
514 	assert(ls->ls_enabled);
515 
516 	if (log_stream_written(ls, lc->lc_stream_pos) == 0) {
517 		log_session_finish(&session);
518 		return false;
519 	}
520 
521 	lc->lc_stream_pos += log_stream_sync(ls, lc->lc_stream_pos);
522 
523 	size_t blk_avail = MIN(log_stream_written(ls, lc->lc_stream_pos),
524 	    lc->lc_blk_count);
525 	assert(blk_avail > 0);
526 
527 	log_stream_read_blocks(ls, lc->lc_stream_pos, blk_avail, lc->lc_blk);
528 	log_stream_read(ls, lc->lc_stream_pos, blk_avail, lc->lc_buf);
529 
530 	log_session_finish(&session);
531 	lc->lc_stream_pos += blk_avail;
532 
533 	return true;
534 }
535 
536 static void
log_stream_si_wakeup_locked(log_stream_si_t * lsi)537 log_stream_si_wakeup_locked(log_stream_si_t *lsi)
538 {
539 	LCK_SPIN_ASSERT(&lsi->lsi_lock, LCK_ASSERT_OWNED);
540 
541 	if (lsi->lsi_channel) {
542 		selwakeup(&lsi->lsi_selinfo);
543 		if (lsi->lsi_opts & LOG_STREAM_OPT_RDWAIT) {
544 			wakeup(lsi->lsi_channel);
545 			lsi->lsi_opts &= ~LOG_STREAM_OPT_RDWAIT;
546 		}
547 	}
548 }
549 
550 static void
log_stream_si_wakeup(log_stream_si_t * lsi)551 log_stream_si_wakeup(log_stream_si_t *lsi)
552 {
553 	static size_t _Atomic delayed_wakeups = 0;
554 
555 	if (!lck_spin_try_lock(&lsi->lsi_lock)) {
556 		os_atomic_inc(&delayed_wakeups, relaxed);
557 		return;
558 	}
559 
560 	log_stream_si_wakeup_locked(lsi);
561 
562 	if (atomic_load(&delayed_wakeups) > 0) {
563 		log_stream_si_wakeup_locked(lsi);
564 		os_atomic_dec(&delayed_wakeups, relaxed);
565 	}
566 
567 	log_stream_si_unlock(lsi);
568 }
569 
570 static void
log_stream_si_record(log_stream_si_t * lsi,void * wql,proc_t p)571 log_stream_si_record(log_stream_si_t *lsi, void *wql, proc_t p)
572 {
573 	log_stream_si_lock(lsi);
574 	assert(lsi->lsi_channel);
575 	selrecord(p, &lsi->lsi_selinfo, wql);
576 	log_stream_si_unlock(lsi);
577 }
578 
579 static void
log_stream_si_enable(log_stream_si_t * lsi,log_stream_t * ls)580 log_stream_si_enable(log_stream_si_t *lsi, log_stream_t *ls)
581 {
582 	log_stream_si_lock(lsi);
583 	assert(!lsi->lsi_channel);
584 	lsi->lsi_channel = (caddr_t)ls;
585 	log_stream_si_unlock(lsi);
586 }
587 
588 static void
log_stream_si_disable(log_stream_si_t * lsi)589 log_stream_si_disable(log_stream_si_t *lsi)
590 {
591 	log_stream_si_lock(lsi);
592 	log_stream_si_wakeup_locked(lsi);
593 	lsi->lsi_opts &= ~(LOG_STREAM_OPT_NBIO | LOG_STREAM_OPT_ASYNC);
594 	selthreadclear(&lsi->lsi_selinfo);
595 	assert(lsi->lsi_channel);
596 	lsi->lsi_channel = (caddr_t)NULL;
597 	log_stream_si_unlock(lsi);
598 }
599 
600 static bool
log_stream_make(log_stream_t * ls,size_t stream_size)601 log_stream_make(log_stream_t *ls, size_t stream_size)
602 {
603 	assert(stream_size >= LOG_STREAM_MIN_SIZE);
604 	assert(stream_size <= LOG_STREAM_MAX_SIZE);
605 
606 	bzero(ls, sizeof(*ls));
607 
608 	ls->ls_blk_count = blocks_count(stream_size);
609 	ls->ls_blk = kalloc_data(ls->ls_blk_count, Z_WAITOK | Z_ZERO);
610 	if (!ls->ls_blk) {
611 		return false;
612 	}
613 
614 	ls->ls_buf = kalloc_data(blocks_size(ls->ls_blk_count),
615 	    Z_WAITOK | Z_ZERO);
616 	if (!ls->ls_buf) {
617 		kfree_data(ls->ls_blk, ls->ls_blk_count);
618 		return false;
619 	}
620 
621 	return true;
622 }
623 
624 static void
log_stream_enable(log_stream_t * tgt,log_stream_t * src)625 log_stream_enable(log_stream_t *tgt, log_stream_t *src)
626 {
627 	/*
628 	 * Never overwrite reservation and commited sequences. Preserving values
629 	 * allows to avoid races between threads when the device gets opened and
630 	 * closed multiple times.
631 	 */
632 	tgt->ls_buf = src->ls_buf;
633 	tgt->ls_blk = src->ls_blk;
634 	bzero(src, sizeof(*src));
635 	tgt->ls_enabled = true;
636 }
637 
638 static void
log_stream_disable(log_stream_t * src,log_stream_t * tgt)639 log_stream_disable(log_stream_t *src, log_stream_t *tgt)
640 {
641 	*tgt = *src;
642 	src->ls_buf = NULL;
643 	src->ls_blk = NULL;
644 	src->ls_enabled = false;
645 }
646 
647 static void
log_stream_teardown(log_stream_t * ls)648 log_stream_teardown(log_stream_t *ls)
649 {
650 	if (ls->ls_buf) {
651 		const size_t buf_size = blocks_size(ls->ls_blk_count);
652 		bzero(ls->ls_buf, buf_size);
653 		kfree_data(ls->ls_buf, buf_size);
654 	}
655 	if (ls->ls_blk) {
656 		kfree_type(uint8_t, ls->ls_blk_count, ls->ls_blk);
657 	}
658 	bzero(ls, sizeof(*ls));
659 }
660 
661 static bool
log_cache_make(log_cache_t * lc,size_t lc_size,log_stream_t * ls)662 log_cache_make(log_cache_t *lc, size_t lc_size, log_stream_t *ls)
663 {
664 	bzero(lc, sizeof(*lc));
665 
666 	lc->lc_blk_count = blocks_count(lc_size);
667 	lc->lc_blk = kalloc_data(lc->lc_blk_count, Z_WAITOK | Z_ZERO);
668 	if (!lc->lc_blk) {
669 		return false;
670 	}
671 
672 	lc->lc_buf = kalloc_data(blocks_size(lc->lc_blk_count), Z_WAITOK | Z_ZERO);
673 	if (!lc->lc_buf) {
674 		kfree_data(lc->lc_blk, lc->lc_blk_count);
675 		return false;
676 	}
677 
678 	lc->lc_stream = ls;
679 	lc->lc_stream_pos = log_stream_written(ls, 0);
680 
681 	return true;
682 }
683 
684 static void
log_cache_move(log_cache_t * src,log_cache_t * tgt)685 log_cache_move(log_cache_t *src, log_cache_t *tgt)
686 {
687 	*tgt = *src;
688 	bzero(src, sizeof(*src));
689 }
690 
691 static void
log_cache_teardown(log_cache_t * lc)692 log_cache_teardown(log_cache_t *lc)
693 {
694 	if (lc->lc_blk) {
695 		kfree_data(lc->lc_blk, lc->lc_blk_count);
696 	}
697 	if (lc->lc_buf) {
698 		kfree_data(lc->lc_buf, blocks_size(lc->lc_blk_count));
699 	}
700 	bzero(lc, sizeof(*lc));
701 }
702 
703 static void
log_cache_rewind(log_cache_t * lc)704 log_cache_rewind(log_cache_t *lc)
705 {
706 	bzero(lc->lc_blk, lc->lc_blk_count);
707 	bzero(lc->lc_buf, blocks_size(lc->lc_blk_count));
708 	lc->lc_blk_pos = 0;
709 }
710 
711 static void
log_cache_consume(log_cache_t * lc,size_t amount)712 log_cache_consume(log_cache_t *lc, size_t amount)
713 {
714 	lc->lc_blk_pos += blocks_needed(amount);
715 }
716 
717 static size_t
log_cache_next_msg(log_cache_t * lc)718 log_cache_next_msg(log_cache_t *lc)
719 {
720 	assert(lc->lc_blk_pos <= lc->lc_blk_count);
721 
722 	for (size_t n = lc->lc_blk_pos; n < lc->lc_blk_count; n++) {
723 		if (lc->lc_blk[n] == LOG_BLK_HEADER) {
724 			lc->lc_blk_pos = n;
725 			return lc->lc_blk_pos;
726 		}
727 	}
728 	return LOG_CACHE_EMPTY;
729 }
730 
731 static log_stream_msg_t *
log_cache_msg(const log_cache_t * lc,size_t blk_id,size_t * msg_size)732 log_cache_msg(const log_cache_t *lc, size_t blk_id, size_t *msg_size)
733 {
734 	assert(blk_id != LOG_CACHE_EMPTY);
735 	assert(blk_id < lc->lc_blk_count);
736 
737 	log_stream_msg_t *msg =
738 	    (log_stream_msg_t *)&lc->lc_buf[blocks_size(blk_id)];
739 
740 	if (!LOG_SIZE_VALID(msg->lsm_ft.ft_length)) {
741 		*msg_size = 0;
742 		return NULL;
743 	}
744 
745 	*msg_size = log_stream_msg_size(msg);
746 	return msg;
747 }
748 
749 static bool
log_cache_get(log_cache_t * lc,log_stream_msg_t ** log,size_t * log_size)750 log_cache_get(log_cache_t *lc, log_stream_msg_t **log, size_t *log_size)
751 {
752 	size_t log_index = log_cache_next_msg(lc);
753 
754 	/*
755 	 * Find a next message. If the message is cached partially, seek the
756 	 * cursor back to the message beginning and refill the cache. Refill if
757 	 * the cache is empty.
758 	 */
759 	if (log_index != LOG_CACHE_EMPTY) {
760 		*log = log_cache_msg(lc, log_index, log_size);
761 		assert(*log && *log_size > 0);
762 		size_t remains = lc->lc_blk_count - log_index;
763 		if (*log_size <= blocks_size(remains)) {
764 			return true;
765 		}
766 		lc->lc_stream_pos -= remains;
767 	}
768 	log_cache_rewind(lc);
769 
770 	if (log_cache_refill(lc)) {
771 		*log = log_cache_msg(lc, log_cache_next_msg(lc), log_size);
772 		return true;
773 	}
774 	return false;
775 }
776 
777 static int
handle_no_logs(log_stream_t * ls,int flag)778 handle_no_logs(log_stream_t *ls, int flag)
779 {
780 	if (flag & IO_NDELAY) {
781 		return EWOULDBLOCK;
782 	}
783 
784 	log_stream_si_lock(&log_stream_si);
785 	if (log_stream_si.lsi_opts & LOG_STREAM_OPT_NBIO) {
786 		log_stream_si_unlock(&log_stream_si);
787 		return EWOULDBLOCK;
788 	}
789 	log_stream_si.lsi_opts |= LOG_STREAM_OPT_RDWAIT;
790 	log_stream_si_unlock(&log_stream_si);
791 
792 	wait_result_t wr = assert_wait((event_t)ls, THREAD_INTERRUPTIBLE);
793 	if (wr == THREAD_WAITING) {
794 		wr = thread_block(THREAD_CONTINUE_NULL);
795 	}
796 
797 	return wr == THREAD_AWAKENED || wr == THREAD_TIMED_OUT ? 0 : EINTR;
798 }
799 
800 void
oslog_stream(bool is_metadata,firehose_tracepoint_id_u ftid,uint64_t stamp,const void * data,size_t datalen)801 oslog_stream(bool is_metadata, firehose_tracepoint_id_u ftid, uint64_t stamp,
802     const void *data, size_t datalen)
803 {
804 	if (!log_stream.ls_enabled) {
805 		counter_inc(&oslog_s_dropped_msgcount);
806 		return;
807 	}
808 
809 	if (__improbable(!oslog_is_safe())) {
810 		counter_inc(&oslog_s_dropped_msgcount);
811 		return;
812 	}
813 
814 	if (__improbable(is_metadata)) {
815 		counter_inc(&oslog_s_metadata_msgcount);
816 	} else {
817 		counter_inc(&oslog_s_total_msgcount);
818 	}
819 
820 	if (__improbable(!LOG_SIZE_VALID(datalen))) {
821 		counter_inc(&oslog_s_error_count);
822 		return;
823 	}
824 
825 	log_stream_msg_t msg;
826 	size_t msg_size = log_stream_msg_make(&msg, ftid, stamp, datalen);
827 
828 	log_stream_session_t session;
829 	log_session_start(&log_stream, msg_size, &session);
830 
831 	// Check again, the state may have changed.
832 	if (!log_session_stream(&session)->ls_enabled) {
833 		log_session_finish(&session);
834 		counter_inc(&oslog_s_dropped_msgcount);
835 		return;
836 	}
837 
838 	log_session_store(&session, &msg, data);
839 	log_session_finish(&session);
840 	log_stream_si_wakeup(&log_stream_si);
841 }
842 
843 int
oslog_streamread(dev_t dev,struct uio * uio,int flag)844 oslog_streamread(dev_t dev, struct uio *uio, int flag)
845 {
846 	log_stream_msg_t *log = NULL;
847 	size_t log_size = 0;
848 	int error;
849 
850 	if (minor(dev) != 0) {
851 		return ENXIO;
852 	}
853 
854 	log_stream_lock();
855 
856 	if (!log_stream.ls_enabled) {
857 		log_stream_unlock();
858 		return ENXIO;
859 	}
860 
861 	while (!log_cache_get(&log_stream_cache, &log, &log_size)) {
862 		log_stream_unlock();
863 		if ((error = handle_no_logs(&log_stream, flag))) {
864 			return error;
865 		}
866 		log_stream_lock();
867 		if (!log_stream.ls_enabled) {
868 			log_stream_unlock();
869 			return ENXIO;
870 		}
871 	}
872 	assert(log);
873 	assert(log_size > 0);
874 
875 	log_stream_msg_verify(log);
876 
877 	if (log_size > MIN(uio_resid(uio), INT_MAX)) {
878 		log_stream_unlock();
879 		counter_inc(&oslog_s_error_count);
880 		return ENOBUFS;
881 	}
882 
883 	error = uiomove((caddr_t)log, (int)log_size, uio);
884 	if (!error) {
885 		log_cache_consume(&log_stream_cache, log_size);
886 		counter_inc(&oslog_s_streamed_msgcount);
887 	} else {
888 		counter_inc(&oslog_s_error_count);
889 	}
890 
891 	log_stream_unlock();
892 
893 	return error;
894 }
895 
896 int
oslog_streamselect(dev_t dev,int rw,void * wql,proc_t p)897 oslog_streamselect(dev_t dev, int rw, void *wql, proc_t p)
898 {
899 	if (minor(dev) != 0 || rw != FREAD) {
900 		return 0;
901 	}
902 
903 	bool new_logs = true;
904 
905 	log_stream_lock();
906 	if (log_cache_next_msg(&log_stream_cache) == LOG_CACHE_EMPTY &&
907 	    log_stream_avail(&log_stream, log_stream_cache.lc_stream_pos) == 0) {
908 		log_stream_si_record(&log_stream_si, wql, p);
909 		new_logs = false;
910 	}
911 	log_stream_unlock();
912 
913 	return new_logs;
914 }
915 
916 int
oslog_streamioctl(dev_t dev,u_long com,caddr_t data,__unused int flag,__unused struct proc * p)917 oslog_streamioctl(dev_t dev, u_long com, caddr_t data, __unused int flag,
918     __unused struct proc *p)
919 {
920 	if (minor(dev) != 0) {
921 		return ENXIO;
922 	}
923 
924 	log_stream_siopts_t opt = 0;
925 
926 	switch (com) {
927 	case FIOASYNC:
928 		opt = LOG_STREAM_OPT_ASYNC;
929 		break;
930 	case FIONBIO:
931 		opt = LOG_STREAM_OPT_NBIO;
932 		break;
933 	default:
934 		return ENOTTY;
935 	}
936 
937 	int data_value = 0;
938 	if (data) {
939 		bcopy(data, &data_value, sizeof(data_value));
940 	}
941 
942 	log_stream_lock();
943 	log_stream_si_lock(&log_stream_si);
944 	assert(log_stream.ls_enabled);
945 
946 	if (data_value) {
947 		log_stream_si.lsi_opts |= opt;
948 	} else {
949 		log_stream_si.lsi_opts &= ~opt;
950 	}
951 
952 	log_stream_si_unlock(&log_stream_si);
953 	log_stream_unlock();
954 
955 	return 0;
956 }
957 
958 int
oslog_streamopen(dev_t dev,__unused int flags,__unused int mode,__unused struct proc * p)959 oslog_streamopen(dev_t dev, __unused int flags, __unused int mode,
960     __unused struct proc *p)
961 {
962 	if (minor(dev) != 0) {
963 		return ENXIO;
964 	}
965 
966 	log_stream_t bringup_ls;
967 	if (!log_stream_make(&bringup_ls, log_stream_size)) {
968 		return ENOMEM;
969 	}
970 
971 	log_cache_t bringup_lsc;
972 	if (!log_cache_make(&bringup_lsc, log_stream_cache_size, &log_stream)) {
973 		log_stream_teardown(&bringup_ls);
974 		return ENOMEM;
975 	}
976 
977 	log_stream_lock();
978 
979 	if (log_stream.ls_enabled) {
980 		log_stream_unlock();
981 		log_stream_teardown(&bringup_ls);
982 		log_cache_teardown(&bringup_lsc);
983 		return EBUSY;
984 	}
985 
986 	log_stream_session_t session;
987 	log_session_start(&log_stream, log_stream.ls_blk_count, &session);
988 
989 	log_stream_enable(&log_stream, &bringup_ls);
990 	log_session_finish(&session);
991 
992 	log_cache_move(&bringup_lsc, &log_stream_cache);
993 	log_stream_si_enable(&log_stream_si, &log_stream);
994 	log_stream_unlock();
995 
996 	return 0;
997 }
998 
999 int
oslog_streamclose(dev_t dev,__unused int flag,__unused int devtype,__unused struct proc * p)1000 oslog_streamclose(dev_t dev, __unused int flag, __unused int devtype, __unused struct proc *p)
1001 {
1002 	if (minor(dev) != 0) {
1003 		return ENXIO;
1004 	}
1005 
1006 	log_stream_lock();
1007 
1008 	if (!log_stream.ls_enabled) {
1009 		log_stream_unlock();
1010 		return ENXIO;
1011 	}
1012 
1013 	log_stream_si_disable(&log_stream_si);
1014 
1015 	log_stream_session_t session;
1016 	log_session_start(&log_stream, log_stream.ls_blk_count, &session);
1017 
1018 	log_stream_t teardown_ls;
1019 	log_stream_disable(&log_stream, &teardown_ls);
1020 	log_session_finish(&session);
1021 
1022 	log_cache_t teardown_lsc;
1023 	log_cache_move(&log_stream_cache, &teardown_lsc);
1024 	log_stream_unlock();
1025 
1026 	log_stream_teardown(&teardown_ls);
1027 	log_cache_teardown(&teardown_lsc);
1028 
1029 	return 0;
1030 }
1031 
1032 __startup_func
1033 static void
oslog_stream_init(void)1034 oslog_stream_init(void)
1035 {
1036 	if (os_log_disabled()) {
1037 		printf("OSLog stream disabled: Logging disabled by ATM\n");
1038 		return;
1039 	}
1040 
1041 	log_stream_size = MAX(log_stream_size, LOG_STREAM_MIN_SIZE);
1042 	log_stream_size = MIN(log_stream_size, LOG_STREAM_MAX_SIZE);
1043 
1044 	log_stream_cache_size = MAX(log_stream_cache_size, LOG_CACHE_MIN_SIZE);
1045 	log_stream_cache_size = MIN(log_stream_cache_size, LOG_CACHE_MAX_SIZE);
1046 	log_stream_cache_size = MIN(log_stream_cache_size, log_stream_size / 2);
1047 
1048 	log_stream.ls_blk_count = blocks_count(log_stream_size);
1049 	lck_spin_init(&log_stream_si.lsi_lock, &log_stream_lock_grp,
1050 	    LCK_ATTR_NULL);
1051 
1052 	printf("OSLog stream configured: stream: %lu bytes, cache: %lu bytes\n",
1053 	    log_stream_size, log_stream_cache_size);
1054 #ifdef LOG_STREAM_VERIFY
1055 	printf("OSLog stream verification: %d\n", log_stream_verify);
1056 #endif /* LOG_STREAM_VERIFY */
1057 }
1058 STARTUP(OSLOG, STARTUP_RANK_SECOND, oslog_stream_init);
1059