xref: /xnu-10002.41.9/bsd/vfs/vfs_fsevents.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <stdarg.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/event.h>         // for kqueue related stuff
32 #include <sys/fsevents.h>
33 
34 #if CONFIG_FSE
35 #include <sys/namei.h>
36 #include <sys/filedesc.h>
37 #include <sys/kernel.h>
38 #include <sys/file_internal.h>
39 #include <sys/stat.h>
40 #include <sys/vnode_internal.h>
41 #include <sys/mount_internal.h>
42 #include <sys/proc_internal.h>
43 #include <sys/kauth.h>
44 #include <sys/uio.h>
45 #include <kern/kalloc.h>
46 #include <sys/dirent.h>
47 #include <sys/attr.h>
48 #include <sys/sysctl.h>
49 #include <sys/ubc.h>
50 #include <machine/cons.h>
51 #include <miscfs/specfs/specdev.h>
52 #include <miscfs/devfs/devfs.h>
53 #include <sys/filio.h>
54 #include <kern/locks.h>
55 #include <libkern/OSAtomic.h>
56 #include <kern/zalloc.h>
57 #include <mach/mach_time.h>
58 #include <kern/thread_call.h>
59 #include <kern/clock.h>
60 #include <IOKit/IOBSD.h>
61 
62 #include <security/audit/audit.h>
63 #include <bsm/audit_kevents.h>
64 
65 #include <pexpert/pexpert.h>
66 #include <libkern/section_keywords.h>
67 
68 typedef struct kfs_event {
69 	LIST_ENTRY(kfs_event) kevent_list;
70 	uint64_t       abstime;    // when this event happened (mach_absolute_time())
71 	int16_t        type;       // type code of this event
72 	uint16_t       flags;      // per-event flags
73 	int32_t        refcount;   // number of clients referencing this
74 	pid_t          pid;
75 	int32_t        spare;
76 
77 	union {
78 		struct regular_event {
79 			// This must match the layout of fse_info
80 			// exactly, except for the "nlink" field is
81 			// not included here.  See copy_out_kfse()
82 			// for all of the sordid details, and also
83 			// the _Static_assert() statements below.
84 			ino64_t          ino;
85 			dev_t            dev;
86 			int32_t          mode;
87 			uid_t            uid;
88 			uint32_t         document_id;
89 			struct kfs_event *dest; // if this is a two-file op
90 			const char       *str;
91 			uint16_t         len;
92 		} regular_event;
93 
94 		struct {
95 			ino64_t          src_ino;
96 			ino64_t          dst_ino;
97 			uint64_t         docid;
98 			dev_t            dev;
99 		} docid_event;
100 
101 		struct {
102 			audit_token_t    audit_token;
103 			const char       *str;
104 			uint16_t         len;
105 		} access_granted_event;
106 	};
107 } kfs_event;
108 
109 _Static_assert(offsetof(struct regular_event, ino) == offsetof(fse_info, ino),
110     "kfs_event and fse_info out-of-sync");
111 _Static_assert(offsetof(struct regular_event, dev) == offsetof(fse_info, dev),
112     "kfs_event and fse_info out-of-sync");
113 _Static_assert(offsetof(struct regular_event, mode) == offsetof(fse_info, mode),
114     "kfs_event and fse_info out-of-sync");
115 _Static_assert(offsetof(struct regular_event, uid) == offsetof(fse_info, uid),
116     "kfs_event and fse_info out-of-sync");
117 _Static_assert(offsetof(struct regular_event, document_id) == offsetof(fse_info, document_id),
118     "kfs_event and fse_info out-of-sync");
119 
120 #define KFSE_INFO_COPYSIZE offsetof(fse_info, nlink)
121 
122 // flags for the flags field
123 #define KFSE_COMBINED_EVENTS          0x0001
124 #define KFSE_CONTAINS_DROPPED_EVENTS  0x0002
125 #define KFSE_ON_LIST                  0x0004
126 #define KFSE_BEING_CREATED            0x0008
127 
128 LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
129 int num_events_outstanding = 0;
130 int num_pending_rename = 0;
131 
132 
133 struct fsevent_handle;
134 
135 typedef struct fs_event_watcher {
136 	int8_t      *event_list;         // the events we're interested in
137 	int32_t      num_events;
138 	dev_t       *devices_not_to_watch;// report events from devices not in this list
139 	uint32_t     num_devices;
140 	int32_t      flags;
141 	kfs_event  **event_queue;
142 	int32_t      eventq_size;        // number of event pointers in queue
143 	int32_t      num_readers;
144 	int32_t      rd;                 // read index into the event_queue
145 	int32_t      wr;                 // write index into the event_queue
146 	int32_t      blockers;
147 	int32_t      my_id;
148 	uint32_t     num_dropped;
149 	uint64_t     max_event_id;
150 	struct fsevent_handle *fseh;
151 	pid_t        pid;
152 	char         proc_name[(2 * MAXCOMLEN) + 1];
153 } fs_event_watcher;
154 
155 // fs_event_watcher flags
156 #define WATCHER_DROPPED_EVENTS         0x0001
157 #define WATCHER_CLOSING                0x0002
158 #define WATCHER_WANTS_COMPACT_EVENTS   0x0004
159 #define WATCHER_WANTS_EXTENDED_INFO    0x0008
160 #define WATCHER_APPLE_SYSTEM_SERVICE   0x0010   // fseventsd, coreservicesd, mds, revisiond
161 
162 #define MAX_WATCHERS  8
163 static fs_event_watcher *watcher_table[MAX_WATCHERS];
164 
165 #define DEFAULT_MAX_KFS_EVENTS   4096
166 static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
167 
168 // we allocate kfs_event structures out of this zone
169 static zone_t     event_zone;
170 static int        fs_event_init = 0;
171 
172 //
173 // this array records whether anyone is interested in a
174 // particular type of event.  if no one is, we bail out
175 // early from the event delivery
176 //
177 static int16_t     fs_event_type_watchers[FSE_MAX_EVENTS];
178 
179 // the device currently being unmounted:
180 static dev_t fsevent_unmount_dev = 0;
181 // how many ACKs are still outstanding:
182 static int fsevent_unmount_ack_count = 0;
183 
184 static int  watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse);
185 static void fsevents_wakeup(fs_event_watcher *watcher);
186 
187 //
188 // Locks
189 //
190 static LCK_ATTR_DECLARE(fsevent_lock_attr, 0, 0);
191 static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
192 static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
193 
194 static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
195     &fsevent_rw_group, &fsevent_lock_attr);
196 static LCK_MTX_DECLARE_ATTR(watch_table_lock,
197     &fsevent_mutex_group, &fsevent_lock_attr);
198 static LCK_MTX_DECLARE_ATTR(event_buf_lock,
199     &fsevent_mutex_group, &fsevent_lock_attr);
200 static LCK_MTX_DECLARE_ATTR(event_writer_lock,
201     &fsevent_mutex_group, &fsevent_lock_attr);
202 
203 
204 /* Explicitly declare qsort so compiler doesn't complain */
205 __private_extern__ void qsort(
206 	void * array,
207 	size_t nmembers,
208 	size_t member_size,
209 	int (*)(const void *, const void *));
210 
211 static int
is_ignored_directory(const char * path)212 is_ignored_directory(const char *path)
213 {
214 	if (!path) {
215 		return 0;
216 	}
217 
218 #define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
219 	if (IS_TLD("/.Spotlight-V100/") ||
220 	    IS_TLD("/.MobileBackups/") ||
221 	    IS_TLD("/Backups.backupdb/")) {
222 		return 1;
223 	}
224 #undef IS_TLD
225 
226 	return 0;
227 }
228 
229 static void
fsevents_internal_init(void)230 fsevents_internal_init(void)
231 {
232 	int i;
233 
234 	if (fs_event_init++ != 0) {
235 		return;
236 	}
237 
238 	for (i = 0; i < FSE_MAX_EVENTS; i++) {
239 		fs_event_type_watchers[i] = 0;
240 	}
241 
242 	memset(watcher_table, 0, sizeof(watcher_table));
243 
244 	PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
245 
246 	event_zone = zone_create_ext("fs-event-buf", sizeof(kfs_event),
247 	    ZC_NOGC | ZC_NOCALLOUT, ZONE_ID_ANY, ^(zone_t z) {
248 		// mark the zone as exhaustible so that it will not
249 		// ever grow beyond what we initially filled it with
250 		zone_set_exhaustible(z, max_kfs_events, /* exhausts */ true);
251 	});
252 
253 	zone_fill_initially(event_zone, max_kfs_events);
254 }
255 
256 static void
lock_watch_table(void)257 lock_watch_table(void)
258 {
259 	lck_mtx_lock(&watch_table_lock);
260 }
261 
262 static void
unlock_watch_table(void)263 unlock_watch_table(void)
264 {
265 	lck_mtx_unlock(&watch_table_lock);
266 }
267 
268 static void
lock_fs_event_list(void)269 lock_fs_event_list(void)
270 {
271 	lck_mtx_lock(&event_buf_lock);
272 }
273 
274 static void
unlock_fs_event_list(void)275 unlock_fs_event_list(void)
276 {
277 	lck_mtx_unlock(&event_buf_lock);
278 }
279 
280 // forward prototype
281 static void release_event_ref(kfs_event *kfse);
282 
283 static boolean_t
watcher_cares_about_dev(fs_event_watcher * watcher,dev_t dev)284 watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
285 {
286 	unsigned int i;
287 
288 	// if devices_not_to_watch is NULL then we care about all
289 	// events from all devices
290 	if (watcher->devices_not_to_watch == NULL) {
291 		return true;
292 	}
293 
294 	for (i = 0; i < watcher->num_devices; i++) {
295 		if (dev == watcher->devices_not_to_watch[i]) {
296 			// found a match! that means we do not
297 			// want events from this device.
298 			return false;
299 		}
300 	}
301 
302 	// if we're here it's not in the devices_not_to_watch[]
303 	// list so that means we do care about it
304 	return true;
305 }
306 
307 
308 int
need_fsevent(int type,vnode_t vp)309 need_fsevent(int type, vnode_t vp)
310 {
311 	if (type >= 0 && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == 0) {
312 		return 0;
313 	}
314 
315 	// events in /dev aren't really interesting...
316 	if (vp->v_tag == VT_DEVFS) {
317 		return 0;
318 	}
319 
320 	return 1;
321 }
322 
323 
324 #define is_throw_away(x)  ((x) == FSE_STAT_CHANGED || (x) == FSE_CONTENT_MODIFIED)
325 
326 
327 int num_dropped         = 0;
328 
329 static struct timeval last_print;
330 
331 //
332 // These variables are used to track coalescing multiple identical
333 // events for the same vnode/pathname.  If we get the same event
334 // type and same vnode/pathname as the previous event, we just drop
335 // the event since it's superfluous.  This improves some micro-
336 // benchmarks considerably and actually has a real-world impact on
337 // tests like a Finder copy where multiple stat-changed events can
338 // get coalesced.
339 //
340 static int     last_event_type = -1;
341 static void   *last_ptr = NULL;
342 static char    last_str[MAXPATHLEN];
343 static int     last_nlen = 0;
344 static int     last_vid = -1;
345 static uint64_t last_coalesced_time = 0;
346 static void   *last_event_ptr = NULL;
347 static pid_t last_pid = -1;
348 int            last_coalesced = 0;
349 static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
350 
351 #define MAX_HARDLINK_NOTIFICATIONS 128
352 
353 static inline void
kfse_init(kfs_event * kfse,int type,uint64_t time,proc_t p)354 kfse_init(kfs_event *kfse, int type, uint64_t time, proc_t p)
355 {
356 	memset(kfse, 0, sizeof(*kfse));
357 	kfse->refcount = 1;
358 	kfse->type =     (int16_t)type;
359 	kfse->abstime =  time;
360 	kfse->pid =      proc_getpid(p);
361 
362 	OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags);
363 }
364 
365 int
add_fsevent(int type,vfs_context_t ctx,...)366 add_fsevent(int type, vfs_context_t ctx, ...)
367 {
368 	struct proc      *p = vfs_context_proc(ctx);
369 	int               i, arg_type, ret;
370 	kfs_event        *kfse, *kfse_dest = NULL, *cur;
371 	fs_event_watcher *watcher;
372 	va_list           ap;
373 	int               error = 0, did_alloc = 0;
374 	int64_t           orig_linkcount = -1;
375 	dev_t             dev = 0;
376 	uint64_t          now, elapsed;
377 	uint64_t          orig_linkid = 0, next_linkid = 0;
378 	uint64_t          link_parentid = 0;
379 	char             *pathbuff = NULL, *path_override = NULL;
380 	char              *link_name = NULL;
381 	vnode_t           link_vp = NULL;
382 	int               pathbuff_len = 0;
383 	uthread_t         ut = get_bsdthread_info(current_thread());
384 	bool              do_all_links = true;
385 	bool              do_cache_reset = false;
386 
387 	if (type == FSE_CONTENT_MODIFIED_NO_HLINK) {
388 		do_all_links = false;
389 		type = FSE_CONTENT_MODIFIED;
390 	}
391 
392 
393 restart:
394 	va_start(ap, ctx);
395 
396 	// ignore bogus event types..
397 	if (type < 0 || type >= FSE_MAX_EVENTS) {
398 		return EINVAL;
399 	}
400 
401 	// if no one cares about this type of event, bail out
402 	if (fs_event_type_watchers[type] == 0) {
403 		va_end(ap);
404 
405 		return 0;
406 	}
407 
408 	now = mach_absolute_time();
409 
410 	// find a free event and snag it for our use
411 	// NOTE: do not do anything that would block until
412 	//       the lock is dropped.
413 	lock_fs_event_list();
414 
415 	//
416 	// check if this event is identical to the previous one...
417 	// (as long as it's not an event type that can never be the
418 	// same as a previous event)
419 	//
420 	if (path_override == NULL &&
421 	    type != FSE_CREATE_FILE &&
422 	    type != FSE_DELETE &&
423 	    type != FSE_RENAME &&
424 	    type != FSE_EXCHANGE &&
425 	    type != FSE_CHOWN &&
426 	    type != FSE_DOCID_CHANGED &&
427 	    type != FSE_DOCID_CREATED &&
428 	    type != FSE_CLONE &&
429 	    // don't coalesce FSE_ACCESS_GRANTED because it could
430 	    // have been granted to a different process.
431 	    type != FSE_ACCESS_GRANTED) {
432 		void *ptr = NULL;
433 		int   vid = 0, was_str = 0, nlen = 0;
434 
435 		for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
436 			switch (arg_type) {
437 			case FSE_ARG_VNODE: {
438 				ptr = va_arg(ap, void *);
439 				vid = vnode_vid((struct vnode *)ptr);
440 				last_str[0] = '\0';
441 				break;
442 			}
443 			case FSE_ARG_STRING: {
444 				nlen = va_arg(ap, int32_t);
445 				ptr = va_arg(ap, void *);
446 				was_str = 1;
447 				break;
448 			}
449 			}
450 			if (ptr != NULL) {
451 				break;
452 			}
453 		}
454 
455 		if (sTimebaseInfo.denom == 0) {
456 			(void) clock_timebase_info(&sTimebaseInfo);
457 		}
458 
459 		elapsed = (now - last_coalesced_time);
460 		if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
461 			if (sTimebaseInfo.denom == 1) {
462 				elapsed *= sTimebaseInfo.numer;
463 			} else {
464 				// this could overflow... the worst that will happen is that we'll
465 				// send (or not send) an extra event so I'm not going to worry about
466 				// doing the math right like dtrace_abs_to_nano() does.
467 				elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
468 			}
469 		}
470 
471 		if (type == last_event_type
472 		    && (elapsed < 1000000000)
473 		    && (last_pid == proc_getpid(p))
474 		    &&
475 		    ((vid && vid == last_vid && last_ptr == ptr)
476 		    ||
477 		    (last_str[0] && last_nlen == nlen && ptr && strcmp(last_str, ptr) == 0))
478 		    ) {
479 			last_coalesced++;
480 			unlock_fs_event_list();
481 			va_end(ap);
482 
483 			return 0;
484 		} else {
485 			last_ptr = ptr;
486 			if (ptr && was_str) {
487 				strlcpy(last_str, ptr, sizeof(last_str));
488 			}
489 			last_nlen = nlen;
490 			last_vid = vid;
491 			last_event_type = type;
492 			last_coalesced_time = now;
493 			last_pid = proc_getpid(p);
494 		}
495 	}
496 	va_start(ap, ctx);
497 
498 
499 	kfse = zalloc_noblock(event_zone);
500 	if (kfse && (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE)) {
501 		kfse_dest = zalloc_noblock(event_zone);
502 		if (kfse_dest == NULL) {
503 			did_alloc = 1;
504 			zfree(event_zone, kfse);
505 			kfse = NULL;
506 		}
507 	}
508 
509 
510 	if (kfse == NULL) {    // yikes! no free events
511 		unlock_fs_event_list();
512 		lock_watch_table();
513 
514 		for (i = 0; i < MAX_WATCHERS; i++) {
515 			watcher = watcher_table[i];
516 			if (watcher == NULL) {
517 				continue;
518 			}
519 
520 			watcher->flags |= WATCHER_DROPPED_EVENTS;
521 			fsevents_wakeup(watcher);
522 		}
523 		unlock_watch_table();
524 
525 		{
526 			struct timeval current_tv;
527 
528 			num_dropped++;
529 
530 			// only print a message at most once every 5 seconds
531 			microuptime(&current_tv);
532 			if ((current_tv.tv_sec - last_print.tv_sec) > 10) {
533 				int ii;
534 				void *junkptr = zalloc_noblock(event_zone), *listhead = kfse_list_head.lh_first;
535 
536 				printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
537 				printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
538 				printf("add_fsevent: zalloc sez: %p\n", junkptr);
539 				printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]);
540 				lock_watch_table();
541 				for (ii = 0; ii < MAX_WATCHERS; ii++) {
542 					if (watcher_table[ii] == NULL) {
543 						continue;
544 					}
545 
546 					printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
547 					    watcher_table[ii]->proc_name,
548 					    watcher_table[ii],
549 					    watcher_table[ii]->rd, watcher_table[ii]->wr,
550 					    watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
551 				}
552 				unlock_watch_table();
553 
554 				last_print = current_tv;
555 				if (junkptr) {
556 					zfree(event_zone, junkptr);
557 				}
558 			}
559 		}
560 
561 		if (pathbuff) {
562 			release_pathbuff(pathbuff);
563 			pathbuff = NULL;
564 		}
565 		return ENOSPC;
566 	}
567 
568 	kfse_init(kfse, type, now, p);
569 	last_event_ptr = kfse;
570 	if (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE) {
571 		kfse_init(kfse_dest, type, now, p);
572 		kfse->regular_event.dest = kfse_dest;
573 	}
574 
575 	num_events_outstanding++;
576 	if (kfse->type == FSE_RENAME) {
577 		num_pending_rename++;
578 	}
579 	LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
580 	OSBitOrAtomic16(KFSE_ON_LIST, &kfse->flags);
581 
582 	if (kfse->refcount < 1) {
583 		panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
584 	}
585 
586 	unlock_fs_event_list(); // at this point it's safe to unlock
587 
588 	//
589 	// now process the arguments passed in and copy them into
590 	// the kfse
591 	//
592 
593 	cur = kfse;
594 
595 	if (type == FSE_DOCID_CREATED || type == FSE_DOCID_CHANGED) {
596 		//
597 		// These events are special and not like the other events.
598 		// They only have a dev_t, src inode #, dest inode #, and
599 		// a doc-id (va_arg'd to us in that order).  If we don't
600 		// get one of them, then the error-check filler will
601 		// catch it.
602 		//
603 		do_all_links = false;
604 		arg_type = va_arg(ap, int32_t);
605 		if (arg_type == FSE_ARG_DEV) {
606 			cur->docid_event.dev = (dev_t)(va_arg(ap, dev_t));
607 		}
608 
609 		arg_type = va_arg(ap, int32_t);
610 		if (arg_type == FSE_ARG_INO) {
611 			cur->docid_event.src_ino =
612 			    (ino64_t)(va_arg(ap, ino64_t));
613 		}
614 
615 		arg_type = va_arg(ap, int32_t);
616 		if (arg_type == FSE_ARG_INO) {
617 			cur->docid_event.dst_ino =
618 			    (ino64_t)(va_arg(ap, ino64_t));
619 		}
620 
621 		arg_type = va_arg(ap, int32_t);
622 		if (arg_type == FSE_ARG_INT32) {
623 			cur->docid_event.docid =
624 			    (uint64_t)va_arg(ap, uint32_t);
625 		} else if (arg_type == FSE_ARG_INT64) {
626 			cur->docid_event.docid =
627 			    (uint64_t)va_arg(ap, uint64_t);
628 		}
629 
630 		goto done_with_args;
631 	}
632 
633 	if (type == FSE_ACCESS_GRANTED) {
634 		//
635 		// This one is also different.  We get a path string
636 		// and (maybe) and audit token.  If we don't get the
637 		// audit token, we extract is from the vfs_context_t.
638 		//
639 		audit_token_t *atokenp = NULL;
640 		vnode_t vp = NULL;
641 		char *path_str = NULL;
642 		size_t path_strlen = 0;
643 		void *arg;
644 		int32_t len32;
645 
646 		do_all_links = false;
647 
648 		while ((arg_type = va_arg(ap, int32_t)) != FSE_ARG_DONE) {
649 			switch (arg_type) {
650 			case FSE_ARG_STRING:
651 				len32 = va_arg(ap, int32_t);
652 				arg = va_arg(ap, char *);
653 				if (path_str == NULL) {
654 					path_str = arg;
655 					path_strlen = len32;
656 				}
657 				break;
658 
659 			case FSE_ARG_PATH:
660 				arg = va_arg(ap, char *);
661 				if (path_str == NULL) {
662 					path_str = arg;
663 				}
664 				break;
665 
666 			case FSE_ARG_VNODE:
667 				arg = va_arg(ap, vnode_t);
668 				if (vp == NULL) {
669 					vp = arg;
670 				}
671 				break;
672 
673 			case FSE_ARG_AUDIT_TOKEN:
674 				arg = va_arg(ap, audit_token_t *);
675 				if (atokenp == NULL) {
676 					atokenp = arg;
677 				}
678 				break;
679 
680 			default:
681 				printf("add_fsevent: FSE_ACCESS_GRANTED unknown type %d\n", arg_type);
682 				// just skip one 32-bit word and hope we
683 				// sync up...
684 				(void)va_arg(ap, int32_t);
685 			}
686 		}
687 
688 		if (atokenp != NULL) {
689 			memcpy(&cur->access_granted_event.audit_token,
690 			    atokenp,
691 			    sizeof(cur->access_granted_event.audit_token));
692 		} else if (vfs_context_copy_audit_token(ctx,
693 		    &cur->access_granted_event.audit_token) != 0) {
694 			OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
695 			    &cur->flags);
696 			goto done_with_args;
697 		}
698 
699 		//
700 		// If we got FSE_ARG_STRING, the length includes the
701 		// terminating NUL.  If we got FSE_ARG_PATH, all we
702 		// got was the string pointer, so get the length and
703 		// adjust.  If we didn't get either, then the caller
704 		// needs to have provided us with a vnode, and with
705 		// that we can get the path.
706 		//
707 		if (path_str != NULL) {
708 			if (path_strlen == 0) {
709 				path_strlen = strlen(path_str) + 1;
710 			}
711 		} else if (vp != NULL) {
712 			pathbuff = get_pathbuff();
713 			pathbuff_len = MAXPATHLEN;
714 			pathbuff[0] = '\0';
715 			if (vn_getpath_no_firmlink(vp, pathbuff,
716 			    &pathbuff_len) == 0) {
717 				path_str = pathbuff;
718 				path_strlen = pathbuff_len;
719 			}
720 		}
721 
722 		if (path_str != NULL) {
723 			assert(path_strlen <= INT16_MAX);
724 			cur->access_granted_event.str =
725 			    vfs_addname(path_str, (uint32_t)path_strlen, 0, 0);
726 			if (path_str == pathbuff) {
727 				release_pathbuff(pathbuff);
728 				pathbuff = NULL;
729 			}
730 		}
731 		if (cur->access_granted_event.str == NULL) {
732 			OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
733 			    &cur->flags);
734 		}
735 
736 		goto done_with_args;
737 	}
738 
739 	if (type == FSE_UNMOUNT_PENDING) {
740 		// Just a dev_t
741 		// We use the same fields as the regular event, but we
742 		// don't have all of the data.
743 		do_all_links = false;
744 
745 		arg_type = va_arg(ap, int32_t);
746 		if (arg_type == FSE_ARG_DEV) {
747 			cur->regular_event.dev = (dev_t)(va_arg(ap, dev_t));
748 		}
749 
750 		cur->regular_event.dest = NULL;
751 		cur->regular_event.str = NULL;
752 		cur->regular_event.len = 0;
753 
754 		goto done_with_args;
755 	}
756 
757 	for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
758 		switch (arg_type) {
759 		case FSE_ARG_VNODE: {
760 			// this expands out into multiple arguments to the client
761 			struct vnode *vp;
762 			struct vnode_attr va;
763 
764 			if (kfse->regular_event.str != NULL) {
765 				cur = kfse_dest;
766 			}
767 
768 			vp = va_arg(ap, struct vnode *);
769 			if (vp == NULL) {
770 				panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!",
771 				    cur->type);
772 			}
773 
774 			VATTR_INIT(&va);
775 			VATTR_WANTED(&va, va_fsid);
776 			VATTR_WANTED(&va, va_fileid);
777 			VATTR_WANTED(&va, va_mode);
778 			VATTR_WANTED(&va, va_uid);
779 			VATTR_WANTED(&va, va_document_id);
780 			VATTR_WANTED(&va, va_nlink);
781 			if ((ret = vnode_getattr(vp, &va, vfs_context_kernel())) != 0) {
782 				// printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
783 				cur->regular_event.str = NULL;
784 				error = EINVAL;
785 				goto clean_up;
786 			}
787 
788 			cur->regular_event.dev  = dev = (dev_t)va.va_fsid;
789 			cur->regular_event.ino  = (ino64_t)va.va_fileid;
790 			cur->regular_event.mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
791 			cur->regular_event.uid  = va.va_uid;
792 			cur->regular_event.document_id  = va.va_document_id;
793 			if (vp->v_flag & VISHARDLINK) {
794 				cur->regular_event.mode |= FSE_MODE_HLINK;
795 				if ((vp->v_type == VDIR && va.va_dirlinkcount == 0) || (vp->v_type == VREG && va.va_nlink == 0)) {
796 					cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
797 				}
798 				if (orig_linkid == 0) {
799 					orig_linkid = cur->regular_event.ino;
800 					orig_linkcount = MIN(va.va_nlink, MAX_HARDLINK_NOTIFICATIONS);
801 					link_vp = vp;
802 					if (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID && !link_name) {
803 						VATTR_INIT(&va);
804 						VATTR_WANTED(&va, va_parentid);
805 						VATTR_WANTED(&va, va_name);
806 						link_name = zalloc(ZV_NAMEI);
807 						va.va_name = link_name;
808 						if ((ret = vnode_getattr(vp, &va, vfs_context_kernel()) != 0) ||
809 						    !(VATTR_IS_SUPPORTED(&va, va_name)) ||
810 						    !(VATTR_IS_SUPPORTED(&va, va_parentid))) {
811 							zfree(ZV_NAMEI, link_name);
812 							link_name = NULL;
813 						}
814 						if (link_name) {
815 							link_parentid = va.va_parentid;
816 						}
817 						va.va_name = NULL;
818 					}
819 				}
820 			}
821 
822 			// if we haven't gotten the path yet, get it.
823 			if (pathbuff == NULL && path_override == NULL) {
824 				pathbuff = get_pathbuff();
825 				pathbuff_len = MAXPATHLEN;
826 
827 				pathbuff[0] = '\0';
828 				if ((ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
829 					OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
830 					    &cur->flags);
831 
832 					do {
833 						if (vp->v_parent != NULL) {
834 							vp = vp->v_parent;
835 						} else if (vp->v_mount) {
836 							strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
837 							break;
838 						} else {
839 							vp = NULL;
840 						}
841 
842 						if (vp == NULL) {
843 							break;
844 						}
845 
846 						pathbuff_len = MAXPATHLEN;
847 						ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
848 					} while (ret == ENOSPC);
849 
850 					if (ret != 0 || vp == NULL) {
851 						error = ENOENT;
852 						goto clean_up;
853 					}
854 				}
855 			} else if (path_override) {
856 				pathbuff = path_override;
857 				pathbuff_len = (int)strlen(path_override) + 1;
858 			} else {
859 				strlcpy(pathbuff, "NOPATH", MAXPATHLEN);
860 				pathbuff_len = (int)strlen(pathbuff) + 1;
861 			}
862 
863 			// store the path by adding it to the global string table
864 			cur->regular_event.len = (u_int16_t)pathbuff_len;
865 			cur->regular_event.str =
866 			    vfs_addname(pathbuff, pathbuff_len, 0, 0);
867 			if (cur->regular_event.str == NULL ||
868 			    cur->regular_event.str[0] == '\0') {
869 				panic("add_fsevent: was not able to add path %s to event %p.", pathbuff, cur);
870 			}
871 
872 			if (pathbuff != path_override) {
873 				release_pathbuff(pathbuff);
874 			}
875 			pathbuff = NULL;
876 
877 			break;
878 		}
879 
880 		case FSE_ARG_FINFO: {
881 			fse_info *fse;
882 
883 			fse = va_arg(ap, fse_info *);
884 
885 			cur->regular_event.dev  = dev = (dev_t)fse->dev;
886 			cur->regular_event.ino  = (ino64_t)fse->ino;
887 			cur->regular_event.mode = (int32_t)fse->mode;
888 			cur->regular_event.uid  = (uid_t)fse->uid;
889 			cur->regular_event.document_id  = (uint32_t)fse->document_id;
890 			// if it's a hard-link and this is the last link, flag it
891 			if (fse->mode & FSE_MODE_HLINK) {
892 				if (fse->nlink == 0) {
893 					cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
894 				}
895 				if (orig_linkid == 0) {
896 					orig_linkid = cur->regular_event.ino;
897 					orig_linkcount = MIN(fse->nlink, MAX_HARDLINK_NOTIFICATIONS);
898 				}
899 			}
900 			if (cur->regular_event.mode & FSE_TRUNCATED_PATH) {
901 				OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
902 				    &cur->flags);
903 				cur->regular_event.mode &= ~FSE_TRUNCATED_PATH;
904 			}
905 			break;
906 		}
907 
908 		case FSE_ARG_STRING:
909 			if (kfse->regular_event.str != NULL) {
910 				cur = kfse_dest;
911 			}
912 
913 			cur->regular_event.len =
914 			    (int16_t)(va_arg(ap, int32_t) & 0x7fff);
915 			if (cur->regular_event.len >= 1) {
916 				cur->regular_event.str =
917 				    vfs_addname(va_arg(ap, char *),
918 				    cur->regular_event.len, 0, 0);
919 			} else {
920 				printf("add_fsevent: funny looking string length: %d\n", (int)cur->regular_event.len);
921 				cur->regular_event.len = 2;
922 				cur->regular_event.str = vfs_addname("/",
923 				    cur->regular_event.len, 0, 0);
924 			}
925 			if (cur->regular_event.str[0] == 0) {
926 				printf("add_fsevent: bogus looking string (len %d)\n", cur->regular_event.len);
927 			}
928 			break;
929 
930 		case FSE_ARG_INT32: {
931 			uint32_t ival = (uint32_t)va_arg(ap, int32_t);
932 			kfse->regular_event.uid = ival;
933 			break;
934 		}
935 
936 		default:
937 			printf("add_fsevent: unknown type %d\n", arg_type);
938 			// just skip one 32-bit word and hope we sync up...
939 			(void)va_arg(ap, int32_t);
940 		}
941 	}
942 
943 done_with_args:
944 	va_end(ap);
945 
946 	// XXX Memory barrier here?
947 	if (kfse_dest) {
948 		OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse_dest->flags);
949 	}
950 	OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse->flags);
951 
952 	//
953 	// now we have to go and let everyone know that
954 	// is interested in this type of event
955 	//
956 	lock_watch_table();
957 
958 	for (i = 0; i < MAX_WATCHERS; i++) {
959 		watcher = watcher_table[i];
960 		if (watcher == NULL) {
961 			continue;
962 		}
963 
964 		if (type < watcher->num_events
965 		    && watcher->event_list[type] == FSE_REPORT
966 		    && watcher_cares_about_dev(watcher, dev)) {
967 			if (watcher_add_event(watcher, kfse) != 0) {
968 				watcher->num_dropped++;
969 				continue;
970 			}
971 		}
972 
973 		// if (kfse->refcount < 1) {
974 		//    panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
975 		// }
976 	}
977 
978 	unlock_watch_table();
979 
980 clean_up:
981 
982 	if (pathbuff) {
983 		release_pathbuff(pathbuff);
984 		pathbuff = NULL;
985 	}
986 	// replicate events for sibling hardlinks
987 	if (do_all_links &&
988 	    (kfse->regular_event.mode & FSE_MODE_HLINK) &&
989 	    !(kfse->regular_event.mode & FSE_MODE_LAST_HLINK) &&
990 	    (type == FSE_STAT_CHANGED ||
991 	    type == FSE_CONTENT_MODIFIED ||
992 	    type == FSE_FINDER_INFO_CHANGED ||
993 	    type == FSE_XATTR_MODIFIED)) {
994 		if (orig_linkcount > 0 && orig_linkid != 0) {
995 #ifndef APFSIOC_NEXT_LINK
996 #define APFSIOC_NEXT_LINK  _IOWR('J', 10, uint64_t)
997 #endif
998 			if (path_override == NULL) {
999 				path_override = get_pathbuff();
1000 			}
1001 			if (next_linkid == 0) {
1002 				next_linkid = orig_linkid;
1003 			}
1004 
1005 			if (link_vp) {
1006 				mount_t mp = NULL;
1007 				vnode_t mnt_rootvp = NULL;
1008 				int iret = -1;
1009 
1010 				mp = vnode_mount(link_vp);
1011 				if (mp) {
1012 					iret = VFS_ROOT(mp, &mnt_rootvp, vfs_context_kernel());
1013 				}
1014 
1015 				if (iret == 0 && mnt_rootvp) {
1016 					iret = VNOP_IOCTL(mnt_rootvp, APFSIOC_NEXT_LINK, (char *)&next_linkid, (int)0, vfs_context_kernel());
1017 					vnode_put(mnt_rootvp);
1018 				}
1019 
1020 				int32_t fsid0;
1021 				int path_override_len = MAXPATHLEN;
1022 
1023 				// continue resolving hardlink paths if there is a valid next_linkid retrieved
1024 				// file systems not supporting APFSIOC_NEXT_LINK will skip replicating events for sibling hardlinks
1025 				if (iret == 0 && next_linkid != 0) {
1026 					fsid0 = link_vp->v_mount->mnt_vfsstat.f_fsid.val[0];
1027 					ut->uu_flag |= UT_KERN_RAGE_VNODES;
1028 					if (!do_cache_reset) {
1029 						do_cache_reset = true;
1030 					}
1031 					if ((iret = fsgetpath_internal(ctx, fsid0, next_linkid, MAXPATHLEN, path_override, FSOPT_NOFIRMLINKPATH, &path_override_len)) == 0) {
1032 						orig_linkcount--;
1033 						ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1034 
1035 						if (orig_linkcount >= 0) {
1036 							release_event_ref(kfse);
1037 							goto restart;
1038 						}
1039 					} else {
1040 						// failed to get override path
1041 						// encountered a broken link or the linkid has been deleted before retrieving the path
1042 						orig_linkcount--;
1043 						ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1044 
1045 						if (orig_linkcount >= 0) {
1046 							goto clean_up;
1047 						}
1048 					}
1049 				}
1050 			}
1051 		}
1052 	}
1053 
1054 	if (link_name) {
1055 		/*
1056 		 * If we call fsgetpath on all the links, it will set the link origin cache
1057 		 * to the last link that the path was obtained for.
1058 		 * To restore the the original link id cache in APFS we need to issue a
1059 		 * lookup on the original directory + name for the link.
1060 		 */
1061 		if (do_cache_reset) {
1062 			vnode_t dvp = NULLVP;
1063 
1064 			if ((ret = VFS_VGET(link_vp->v_mount, (ino64_t)link_parentid, &dvp, vfs_context_kernel())) == 0) {
1065 				vnode_t lvp = NULLVP;
1066 
1067 				ret = vnode_lookupat(link_name, 0, &lvp, ctx, dvp);
1068 				if (!ret) {
1069 					vnode_put(lvp);
1070 					lvp = NULLVP;
1071 				}
1072 				vnode_put(dvp);
1073 				dvp = NULLVP;
1074 			}
1075 			ret = 0;
1076 		}
1077 		zfree(ZV_NAMEI, link_name);
1078 		link_name = NULL;
1079 	}
1080 
1081 	if (path_override) {
1082 		release_pathbuff(path_override);
1083 		path_override = NULL;
1084 	}
1085 
1086 	release_event_ref(kfse);
1087 
1088 	return error;
1089 }
1090 
1091 int
test_fse_access_granted(vnode_t vp,unsigned long type,vfs_context_t ctx)1092 test_fse_access_granted(vnode_t vp, unsigned long type, vfs_context_t ctx)
1093 {
1094 	audit_token_t atoken;
1095 	char *pathbuff;
1096 	int error, pathbuff_len;
1097 
1098 	if (type == 0) {
1099 		return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1100 		           FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1101 	}
1102 
1103 	if (type == 1) {
1104 		error = vfs_context_copy_audit_token(ctx, &atoken);
1105 		if (error) {
1106 			return error;
1107 		}
1108 		return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1109 		           FSE_ARG_VNODE, vp, FSE_ARG_AUDIT_TOKEN, &atoken,
1110 		           FSE_ARG_DONE);
1111 	}
1112 
1113 	if (type == 2 || type == 3) {
1114 		pathbuff = get_pathbuff();
1115 		pathbuff_len = MAXPATHLEN;
1116 		pathbuff[0] = '\0';
1117 		error = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
1118 		if (error) {
1119 			release_pathbuff(pathbuff);
1120 			return error;
1121 		}
1122 		if (type == 2) {
1123 			error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1124 			    FSE_ARG_STRING, pathbuff_len, pathbuff,
1125 			    FSE_ARG_DONE);
1126 		} else {
1127 			error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1128 			    FSE_ARG_PATH, pathbuff, FSE_ARG_DONE);
1129 		}
1130 		release_pathbuff(pathbuff);
1131 		return error;
1132 	}
1133 
1134 	return ENOTSUP;
1135 }
1136 
1137 static void
release_event_ref(kfs_event * kfse)1138 release_event_ref(kfs_event *kfse)
1139 {
1140 	int old_refcount;
1141 	kfs_event *dest = NULL;
1142 	const char *path_str = NULL, *dest_path_str = NULL;
1143 
1144 	lock_fs_event_list();
1145 
1146 	old_refcount = OSAddAtomic(-1, &kfse->refcount);
1147 	if (old_refcount > 1) {
1148 		unlock_fs_event_list();
1149 		return;
1150 	}
1151 
1152 	if (last_event_ptr == kfse) {
1153 		last_event_ptr = NULL;
1154 		last_event_type = -1;
1155 		last_coalesced_time = 0;
1156 	}
1157 
1158 	if (kfse->refcount < 0) {
1159 		panic("release_event_ref: bogus kfse refcount %d", kfse->refcount);
1160 	}
1161 
1162 	assert(kfse->refcount == 0);
1163 	assert(kfse->type != FSE_INVALID);
1164 
1165 	//
1166 	// Get pointers to all the things so we can free without
1167 	// holding any locks.
1168 	//
1169 	if (kfse->type != FSE_DOCID_CREATED &&
1170 	    kfse->type != FSE_DOCID_CHANGED &&
1171 	    kfse->type != FSE_ACCESS_GRANTED) {
1172 		path_str = kfse->regular_event.str;
1173 
1174 		dest = kfse->regular_event.dest;
1175 		if (dest != NULL) {
1176 			assert(dest->type != FSE_INVALID);
1177 			if (OSAddAtomic(-1,
1178 			    &kfse->regular_event.dest->refcount) == 1) {
1179 				dest_path_str = dest->regular_event.str;
1180 			} else {
1181 				dest = NULL;
1182 			}
1183 		}
1184 	}
1185 
1186 	if (dest != NULL) {
1187 		if (dest->flags & KFSE_ON_LIST) {
1188 			num_events_outstanding--;
1189 			LIST_REMOVE(dest, kevent_list);
1190 		}
1191 	}
1192 
1193 	if (kfse->flags & KFSE_ON_LIST) {
1194 		num_events_outstanding--;
1195 		LIST_REMOVE(kfse, kevent_list);
1196 		if (kfse->type == FSE_RENAME) {
1197 			num_pending_rename--;
1198 		}
1199 	}
1200 
1201 	unlock_fs_event_list();
1202 
1203 	zfree(event_zone, kfse);
1204 	if (dest != NULL) {
1205 		zfree(event_zone, dest);
1206 	}
1207 
1208 	if (path_str != NULL) {
1209 		vfs_removename(path_str);
1210 	}
1211 	if (dest_path_str != NULL) {
1212 		vfs_removename(dest_path_str);
1213 	}
1214 }
1215 
1216 #define FSEVENTS_WATCHER_ENTITLEMENT            \
1217 	"com.apple.private.vfs.fsevents-watcher"
1218 
1219 //
1220 // We restrict this for two reasons:
1221 //
1222 // 1- So that naive processes don't get this firehose by default.
1223 //
1224 // 2- Because this event, when delivered to watcheres, includes the
1225 //    audit token of the process granted the access, and we don't
1226 //    want to leak that to random watchers.
1227 //
1228 #define FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT \
1229 	"com.apple.private.vfs.fsevents-access-granted-watcher"
1230 
1231 static bool
watcher_is_entitled(task_t task)1232 watcher_is_entitled(task_t task)
1233 {
1234 	//
1235 	// We consider a process to be entitled to watch /dev/fsevents
1236 	// if it has either FSEVENTS_WATCHER_ENTITLEMENT or
1237 	// FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT.
1238 	//
1239 	return !!(IOTaskHasEntitlement(task, FSEVENTS_WATCHER_ENTITLEMENT) ||
1240 	       IOTaskHasEntitlement(task,
1241 	       FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT));
1242 }
1243 
1244 static bool
watcher_is_entitled_for_access_granted(task_t task)1245 watcher_is_entitled_for_access_granted(task_t task)
1246 {
1247 	return !!IOTaskHasEntitlement(task,
1248 	           FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT);
1249 }
1250 
1251 static int
add_watcher(int8_t * event_list,int32_t num_events,int32_t eventq_size,fs_event_watcher ** watcher_out,void * fseh)1252 add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
1253 {
1254 	int               i;
1255 	fs_event_watcher *watcher;
1256 
1257 	if (eventq_size <= 0 || eventq_size > 100 * max_kfs_events) {
1258 		eventq_size = max_kfs_events;
1259 	}
1260 
1261 	// If the watcher wants FSE_ACCESS_GRANTED, ensure it has the
1262 	// correct entitlement.  If not, just silently drop that event.
1263 	if (num_events > FSE_ACCESS_GRANTED &&
1264 	    event_list[FSE_ACCESS_GRANTED] != FSE_IGNORE &&
1265 	    !watcher_is_entitled_for_access_granted(current_task())) {
1266 		event_list[FSE_ACCESS_GRANTED] = FSE_IGNORE;
1267 	}
1268 
1269 	// Note: the event_queue follows the fs_event_watcher struct
1270 	//       in memory so we only have to do one allocation
1271 	watcher = kalloc_type(fs_event_watcher, kfs_event *, eventq_size, Z_WAITOK);
1272 	if (watcher == NULL) {
1273 		return ENOMEM;
1274 	}
1275 
1276 	watcher->event_list   = event_list;
1277 	watcher->num_events   = num_events;
1278 	watcher->devices_not_to_watch = NULL;
1279 	watcher->num_devices  = 0;
1280 	watcher->flags        = 0;
1281 	watcher->event_queue  = (kfs_event **)&watcher[1];
1282 	watcher->eventq_size  = eventq_size;
1283 	watcher->rd           = 0;
1284 	watcher->wr           = 0;
1285 	watcher->blockers     = 0;
1286 	watcher->num_readers  = 0;
1287 	watcher->max_event_id = 0;
1288 	watcher->fseh         = fseh;
1289 	watcher->pid          = proc_selfpid();
1290 	proc_selfname(watcher->proc_name, sizeof(watcher->proc_name));
1291 
1292 	watcher->num_dropped  = 0;  // XXXdbg - debugging
1293 
1294 	if (watcher_is_entitled(current_task())) {
1295 		watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
1296 	} else {
1297 		printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported.  Migrate to FSEventsFramework\n",
1298 		    watcher->proc_name, watcher->pid);
1299 	}
1300 
1301 	lock_watch_table();
1302 
1303 	// find a slot for the new watcher
1304 	for (i = 0; i < MAX_WATCHERS; i++) {
1305 		if (watcher_table[i] == NULL) {
1306 			watcher->my_id   = i;
1307 			watcher_table[i] = watcher;
1308 			break;
1309 		}
1310 	}
1311 
1312 	if (i >= MAX_WATCHERS) {
1313 		printf("fsevents: too many watchers!\n");
1314 		unlock_watch_table();
1315 		kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1316 		return ENOSPC;
1317 	}
1318 
1319 	// now update the global list of who's interested in
1320 	// events of a particular type...
1321 	for (i = 0; i < num_events; i++) {
1322 		if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1323 			fs_event_type_watchers[i]++;
1324 		}
1325 	}
1326 
1327 	unlock_watch_table();
1328 
1329 	*watcher_out = watcher;
1330 
1331 	return 0;
1332 }
1333 
1334 
1335 
1336 static void
remove_watcher(fs_event_watcher * target)1337 remove_watcher(fs_event_watcher *target)
1338 {
1339 	int i, j, counter = 0;
1340 	fs_event_watcher *watcher;
1341 	kfs_event *kfse;
1342 
1343 	lock_watch_table();
1344 
1345 	for (j = 0; j < MAX_WATCHERS; j++) {
1346 		watcher = watcher_table[j];
1347 		if (watcher != target) {
1348 			continue;
1349 		}
1350 
1351 		watcher_table[j] = NULL;
1352 
1353 		for (i = 0; i < watcher->num_events; i++) {
1354 			if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1355 				fs_event_type_watchers[i]--;
1356 			}
1357 		}
1358 
1359 		if (watcher->flags & WATCHER_CLOSING) {
1360 			unlock_watch_table();
1361 			return;
1362 		}
1363 
1364 		// printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1365 		watcher->flags |= WATCHER_CLOSING;
1366 		OSAddAtomic(1, &watcher->num_readers);
1367 
1368 		unlock_watch_table();
1369 
1370 		while (watcher->num_readers > 1 && counter++ < 5000) {
1371 			lock_watch_table();
1372 			fsevents_wakeup(watcher); // in case they're asleep
1373 			unlock_watch_table();
1374 
1375 			tsleep(watcher, PRIBIO, "fsevents-close", 1);
1376 		}
1377 		if (counter++ >= 5000) {
1378 			// printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1379 			panic("fsevents: close: still have readers! (%d)", watcher->num_readers);
1380 		}
1381 
1382 		// drain the event_queue
1383 
1384 		lck_rw_lock_exclusive(&event_handling_lock);
1385 		while (watcher->rd != watcher->wr) {
1386 			kfse = watcher->event_queue[watcher->rd];
1387 			watcher->event_queue[watcher->rd] = NULL;
1388 			watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1389 			OSSynchronizeIO();
1390 			if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1391 				release_event_ref(kfse);
1392 			}
1393 		}
1394 		lck_rw_unlock_exclusive(&event_handling_lock);
1395 
1396 		kfree_data(watcher->event_list, watcher->num_events * sizeof(int8_t));
1397 		kfree_data(watcher->devices_not_to_watch, watcher->num_devices * sizeof(dev_t));
1398 		kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1399 		return;
1400 	}
1401 
1402 	unlock_watch_table();
1403 }
1404 
1405 
1406 #define EVENT_DELAY_IN_MS   10
1407 static thread_call_t event_delivery_timer = NULL;
1408 static int timer_set = 0;
1409 
1410 
1411 static void
delayed_event_delivery(__unused void * param0,__unused void * param1)1412 delayed_event_delivery(__unused void *param0, __unused void *param1)
1413 {
1414 	int i;
1415 
1416 	lock_watch_table();
1417 
1418 	for (i = 0; i < MAX_WATCHERS; i++) {
1419 		if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1420 			fsevents_wakeup(watcher_table[i]);
1421 		}
1422 	}
1423 
1424 	timer_set = 0;
1425 
1426 	unlock_watch_table();
1427 }
1428 
1429 
1430 //
1431 // The watch table must be locked before calling this function.
1432 //
1433 static void
schedule_event_wakeup(void)1434 schedule_event_wakeup(void)
1435 {
1436 	uint64_t deadline;
1437 
1438 	if (event_delivery_timer == NULL) {
1439 		event_delivery_timer = thread_call_allocate((thread_call_func_t)delayed_event_delivery, NULL);
1440 	}
1441 
1442 	clock_interval_to_deadline(EVENT_DELAY_IN_MS, 1000 * 1000, &deadline);
1443 
1444 	thread_call_enter_delayed(event_delivery_timer, deadline);
1445 	timer_set = 1;
1446 }
1447 
1448 
1449 
1450 #define MAX_NUM_PENDING  16
1451 
1452 //
1453 // NOTE: the watch table must be locked before calling
1454 //       this routine.
1455 //
1456 static int
watcher_add_event(fs_event_watcher * watcher,kfs_event * kfse)1457 watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse)
1458 {
1459 	if (kfse->abstime > watcher->max_event_id) {
1460 		watcher->max_event_id = kfse->abstime;
1461 	}
1462 
1463 	if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
1464 		watcher->flags |= WATCHER_DROPPED_EVENTS;
1465 		fsevents_wakeup(watcher);
1466 		return ENOSPC;
1467 	}
1468 
1469 	OSAddAtomic(1, &kfse->refcount);
1470 	watcher->event_queue[watcher->wr] = kfse;
1471 	OSSynchronizeIO();
1472 	watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
1473 
1474 	//
1475 	// wake up the watcher if there are more than MAX_NUM_PENDING events.
1476 	// otherwise schedule a timer (if one isn't already set) which will
1477 	// send any pending events if no more are received in the next
1478 	// EVENT_DELAY_IN_MS milli-seconds.
1479 	//
1480 	int32_t num_pending = 0;
1481 	if (watcher->rd < watcher->wr) {
1482 		num_pending = watcher->wr - watcher->rd;
1483 	}
1484 
1485 	if (watcher->rd > watcher->wr) {
1486 		num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1487 	}
1488 
1489 	if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1490 		/* Non-Apple Service is falling behind, start dropping events for this process */
1491 		lck_rw_lock_exclusive(&event_handling_lock);
1492 		while (watcher->rd != watcher->wr) {
1493 			kfse = watcher->event_queue[watcher->rd];
1494 			watcher->event_queue[watcher->rd] = NULL;
1495 			watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1496 			OSSynchronizeIO();
1497 			if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1498 				release_event_ref(kfse);
1499 			}
1500 		}
1501 		watcher->flags |= WATCHER_DROPPED_EVENTS;
1502 		lck_rw_unlock_exclusive(&event_handling_lock);
1503 
1504 		printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1505 		    watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1506 		    watcher->eventq_size, watcher->flags);
1507 
1508 		fsevents_wakeup(watcher);
1509 	} else if (num_pending > MAX_NUM_PENDING) {
1510 		fsevents_wakeup(watcher);
1511 	} else if (timer_set == 0) {
1512 		schedule_event_wakeup();
1513 	}
1514 
1515 	return 0;
1516 }
1517 
1518 static int
fill_buff(uint16_t type,int32_t size,const void * data,char * buff,int32_t * _buff_idx,int32_t buff_sz,struct uio * uio)1519 fill_buff(uint16_t type, int32_t size, const void *data,
1520     char *buff, int32_t *_buff_idx, int32_t buff_sz,
1521     struct uio *uio)
1522 {
1523 	int32_t amt, error = 0, buff_idx = *_buff_idx;
1524 	uint16_t tmp;
1525 
1526 	//
1527 	// the +1 on the size is to guarantee that the main data
1528 	// copy loop will always copy at least 1 byte
1529 	//
1530 	if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
1531 		if (buff_idx > uio_resid(uio)) {
1532 			error = ENOSPC;
1533 			goto get_out;
1534 		}
1535 
1536 		error = uiomove(buff, buff_idx, uio);
1537 		if (error) {
1538 			goto get_out;
1539 		}
1540 		buff_idx = 0;
1541 	}
1542 
1543 	// copy out the header (type & size)
1544 	memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
1545 	buff_idx += sizeof(uint16_t);
1546 
1547 	tmp = size & 0xffff;
1548 	memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
1549 	buff_idx += sizeof(uint16_t);
1550 
1551 	// now copy the body of the data, flushing along the way
1552 	// if the buffer fills up.
1553 	//
1554 	while (size > 0) {
1555 		amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1556 		memcpy(&buff[buff_idx], data, amt);
1557 
1558 		size -= amt;
1559 		buff_idx += amt;
1560 		data = (const char *)data + amt;
1561 		if (size > (buff_sz - buff_idx)) {
1562 			if (buff_idx > uio_resid(uio)) {
1563 				error = ENOSPC;
1564 				goto get_out;
1565 			}
1566 			error = uiomove(buff, buff_idx, uio);
1567 			if (error) {
1568 				goto get_out;
1569 			}
1570 			buff_idx = 0;
1571 		}
1572 
1573 		if (amt == 0) { // just in case...
1574 			break;
1575 		}
1576 	}
1577 
1578 get_out:
1579 	*_buff_idx = buff_idx;
1580 
1581 	return error;
1582 }
1583 
1584 
1585 static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)  __attribute__((noinline));
1586 
1587 static int
copy_out_kfse(fs_event_watcher * watcher,kfs_event * kfse,struct uio * uio)1588 copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
1589 {
1590 	int      error;
1591 	uint16_t tmp16;
1592 	int32_t  type;
1593 	kfs_event *cur;
1594 	char     evbuff[512];
1595 	int      evbuff_idx = 0;
1596 
1597 	if (kfse->type == FSE_INVALID) {
1598 		panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d)", kfse, kfse->refcount);
1599 	}
1600 
1601 	if (kfse->flags & KFSE_BEING_CREATED) {
1602 		return 0;
1603 	}
1604 
1605 	if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) &&
1606 	    kfse->regular_event.dest == NULL) {
1607 		//
1608 		// This can happen if an event gets recycled but we had a
1609 		// pointer to it in our event queue.  The event is the
1610 		// destination of a rename or clone which we'll process
1611 		// separately (that is, another kfse points to this one
1612 		// so it's ok to skip this guy because we'll process it
1613 		// when we process the other one)
1614 		error = 0;
1615 		goto get_out;
1616 	}
1617 
1618 	if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1619 		type = (kfse->type & 0xfff);
1620 
1621 		if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1622 			type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1623 		} else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1624 			type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1625 		}
1626 	} else {
1627 		type = (int32_t)kfse->type;
1628 	}
1629 
1630 	// copy out the type of the event
1631 	memcpy(evbuff, &type, sizeof(int32_t));
1632 	evbuff_idx += sizeof(int32_t);
1633 
1634 	// copy out the pid of the person that generated the event
1635 	memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
1636 	evbuff_idx += sizeof(pid_t);
1637 
1638 	cur = kfse;
1639 
1640 copy_again:
1641 
1642 	if (kfse->type == FSE_DOCID_CHANGED ||
1643 	    kfse->type == FSE_DOCID_CREATED) {
1644 		dev_t    dev     = cur->docid_event.dev;
1645 		ino64_t  src_ino = cur->docid_event.src_ino;
1646 		ino64_t  dst_ino = cur->docid_event.dst_ino;
1647 		uint64_t docid   = cur->docid_event.docid;
1648 
1649 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff,
1650 		    &evbuff_idx, sizeof(evbuff), uio);
1651 		if (error != 0) {
1652 			goto get_out;
1653 		}
1654 
1655 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &src_ino,
1656 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1657 		if (error != 0) {
1658 			goto get_out;
1659 		}
1660 
1661 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &dst_ino,
1662 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1663 		if (error != 0) {
1664 			goto get_out;
1665 		}
1666 
1667 		error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &docid,
1668 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1669 		if (error != 0) {
1670 			goto get_out;
1671 		}
1672 
1673 		goto done;
1674 	}
1675 
1676 	if (kfse->type == FSE_UNMOUNT_PENDING) {
1677 		dev_t    dev  = cur->regular_event.dev;
1678 
1679 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev,
1680 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1681 		if (error != 0) {
1682 			goto get_out;
1683 		}
1684 
1685 		goto done;
1686 	}
1687 
1688 	if (kfse->type == FSE_ACCESS_GRANTED) {
1689 		//
1690 		// KFSE_CONTAINS_DROPPED_EVENTS will be set if either
1691 		// the path or audit token are bogus; don't copy out
1692 		// either in that case.
1693 		//
1694 		if (cur->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1695 			goto done;
1696 		}
1697 		error = fill_buff(FSE_ARG_STRING,
1698 		    cur->access_granted_event.len,
1699 		    cur->access_granted_event.str,
1700 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1701 		if (error != 0) {
1702 			goto get_out;
1703 		}
1704 		error = fill_buff(FSE_ARG_AUDIT_TOKEN,
1705 		    sizeof(cur->access_granted_event.audit_token),
1706 		    &cur->access_granted_event.audit_token,
1707 		    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1708 		if (error != 0) {
1709 			goto get_out;
1710 		}
1711 
1712 		goto done;
1713 	}
1714 
1715 	if (cur->regular_event.str == NULL ||
1716 	    cur->regular_event.str[0] == '\0') {
1717 		printf("copy_out_kfse:2: empty/short path (%s)\n",
1718 		    cur->regular_event.str);
1719 		error = fill_buff(FSE_ARG_STRING, 2, "/", evbuff, &evbuff_idx,
1720 		    sizeof(evbuff), uio);
1721 	} else {
1722 		error = fill_buff(FSE_ARG_STRING, cur->regular_event.len,
1723 		    cur->regular_event.str, evbuff, &evbuff_idx,
1724 		    sizeof(evbuff), uio);
1725 	}
1726 	if (error != 0) {
1727 		goto get_out;
1728 	}
1729 
1730 	if (cur->regular_event.dev == 0 && cur->regular_event.ino == 0) {
1731 		// this happens when a rename event happens and the
1732 		// destination of the rename did not previously exist.
1733 		// it thus has no other file info so skip copying out
1734 		// the stuff below since it isn't initialized
1735 		goto done;
1736 	}
1737 
1738 
1739 	if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1740 		// We rely on the layout of the "regular_event"
1741 		// structure being the same as fse_info in order
1742 		// to speed up this copy.  The nlink field in
1743 		// fse_info is not included.
1744 		error = fill_buff(FSE_ARG_FINFO, KFSE_INFO_COPYSIZE,
1745 		    &cur->regular_event, evbuff, &evbuff_idx,
1746 		    sizeof(evbuff), uio);
1747 		if (error != 0) {
1748 			goto get_out;
1749 		}
1750 	} else {
1751 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t),
1752 		    &cur->regular_event.dev, evbuff, &evbuff_idx,
1753 		    sizeof(evbuff), uio);
1754 		if (error != 0) {
1755 			goto get_out;
1756 		}
1757 
1758 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t),
1759 		    &cur->regular_event.ino, evbuff, &evbuff_idx,
1760 		    sizeof(evbuff), uio);
1761 		if (error != 0) {
1762 			goto get_out;
1763 		}
1764 
1765 		error = fill_buff(FSE_ARG_MODE, sizeof(int32_t),
1766 		    &cur->regular_event.mode, evbuff, &evbuff_idx,
1767 		    sizeof(evbuff), uio);
1768 		if (error != 0) {
1769 			goto get_out;
1770 		}
1771 
1772 		error = fill_buff(FSE_ARG_UID, sizeof(uid_t),
1773 		    &cur->regular_event.uid, evbuff, &evbuff_idx,
1774 		    sizeof(evbuff), uio);
1775 		if (error != 0) {
1776 			goto get_out;
1777 		}
1778 
1779 		error = fill_buff(FSE_ARG_GID, sizeof(gid_t),
1780 		    &cur->regular_event.document_id, evbuff, &evbuff_idx,
1781 		    sizeof(evbuff), uio);
1782 		if (error != 0) {
1783 			goto get_out;
1784 		}
1785 	}
1786 
1787 	if (cur->regular_event.dest) {
1788 		cur = cur->regular_event.dest;
1789 		goto copy_again;
1790 	}
1791 
1792 done:
1793 	// very last thing: the time stamp
1794 	error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime,
1795 	    evbuff, &evbuff_idx, sizeof(evbuff), uio);
1796 	if (error != 0) {
1797 		goto get_out;
1798 	}
1799 
1800 	// check if the FSE_ARG_DONE will fit
1801 	if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1802 		if (evbuff_idx > uio_resid(uio)) {
1803 			error = ENOSPC;
1804 			goto get_out;
1805 		}
1806 		error = uiomove(evbuff, evbuff_idx, uio);
1807 		if (error) {
1808 			goto get_out;
1809 		}
1810 		evbuff_idx = 0;
1811 	}
1812 
1813 	tmp16 = FSE_ARG_DONE;
1814 	memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
1815 	evbuff_idx += sizeof(uint16_t);
1816 
1817 	// flush any remaining data in the buffer (and hopefully
1818 	// in most cases this is the only uiomove we'll do)
1819 	if (evbuff_idx > uio_resid(uio)) {
1820 		error = ENOSPC;
1821 	} else {
1822 		error = uiomove(evbuff, evbuff_idx, uio);
1823 	}
1824 
1825 get_out:
1826 
1827 	return error;
1828 }
1829 
1830 
1831 
1832 static int
fmod_watch(fs_event_watcher * watcher,struct uio * uio)1833 fmod_watch(fs_event_watcher *watcher, struct uio *uio)
1834 {
1835 	int               error = 0;
1836 	user_ssize_t      last_full_event_resid;
1837 	kfs_event        *kfse;
1838 	uint16_t          tmp16;
1839 	int               skipped;
1840 
1841 	last_full_event_resid = uio_resid(uio);
1842 
1843 	// need at least 2048 bytes of space (maxpathlen + 1 event buf)
1844 	if (uio_resid(uio) < 2048 || watcher == NULL) {
1845 		return EINVAL;
1846 	}
1847 
1848 	if (watcher->flags & WATCHER_CLOSING) {
1849 		return 0;
1850 	}
1851 
1852 	if (OSAddAtomic(1, &watcher->num_readers) != 0) {
1853 		// don't allow multiple threads to read from the fd at the same time
1854 		OSAddAtomic(-1, &watcher->num_readers);
1855 		return EAGAIN;
1856 	}
1857 
1858 restart_watch:
1859 	if (watcher->rd == watcher->wr) {
1860 		if (watcher->flags & WATCHER_CLOSING) {
1861 			OSAddAtomic(-1, &watcher->num_readers);
1862 			return 0;
1863 		}
1864 		OSAddAtomic(1, &watcher->blockers);
1865 
1866 		// there's nothing to do, go to sleep
1867 		error = tsleep((caddr_t)watcher, PUSER | PCATCH, "fsevents_empty", 0);
1868 
1869 		OSAddAtomic(-1, &watcher->blockers);
1870 
1871 		if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
1872 			OSAddAtomic(-1, &watcher->num_readers);
1873 			return error;
1874 		}
1875 	}
1876 
1877 	// if we dropped events, return that as an event first
1878 	if (watcher->flags & WATCHER_DROPPED_EVENTS) {
1879 		int32_t val = FSE_EVENTS_DROPPED;
1880 
1881 		error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1882 		if (error == 0) {
1883 			val = 0; // a fake pid
1884 			error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1885 
1886 			tmp16 = FSE_ARG_DONE; // makes it a consistent msg
1887 			error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
1888 
1889 			last_full_event_resid = uio_resid(uio);
1890 		}
1891 
1892 		if (error) {
1893 			OSAddAtomic(-1, &watcher->num_readers);
1894 			return error;
1895 		}
1896 
1897 		watcher->flags &= ~WATCHER_DROPPED_EVENTS;
1898 	}
1899 
1900 	skipped = 0;
1901 
1902 	lck_rw_lock_shared(&event_handling_lock);
1903 	while (uio_resid(uio) > 0 && watcher->rd != watcher->wr) {
1904 		if (watcher->flags & WATCHER_CLOSING) {
1905 			break;
1906 		}
1907 
1908 		//
1909 		// check if the event is something of interest to us
1910 		// (since it may have been recycled/reused and changed
1911 		// its type or which device it is for)
1912 		//
1913 		kfse = watcher->event_queue[watcher->rd];
1914 		if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
1915 			break;
1916 		}
1917 
1918 		if (watcher->event_list[kfse->type] == FSE_REPORT) {
1919 			if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) &&
1920 			    kfse->type != FSE_DOCID_CREATED &&
1921 			    kfse->type != FSE_DOCID_CHANGED &&
1922 			    kfse->type != FSE_ACCESS_GRANTED &&
1923 			    is_ignored_directory(kfse->regular_event.str)) {
1924 				// If this is not an Apple System Service, skip specified directories
1925 				// radar://12034844
1926 				error = 0;
1927 				skipped = 1;
1928 			} else {
1929 				skipped = 0;
1930 				if (last_event_ptr == kfse) {
1931 					last_event_ptr = NULL;
1932 					last_event_type = -1;
1933 					last_coalesced_time = 0;
1934 				}
1935 				error = copy_out_kfse(watcher, kfse, uio);
1936 				if (error != 0) {
1937 					// if an event won't fit or encountered an error while
1938 					// we were copying it out, then backup to the last full
1939 					// event and just bail out.  if the error was ENOENT
1940 					// then we can continue regular processing, otherwise
1941 					// we should unlock things and return.
1942 					uio_setresid(uio, last_full_event_resid);
1943 					if (error != ENOENT) {
1944 						lck_rw_unlock_shared(&event_handling_lock);
1945 						error = 0;
1946 						goto get_out;
1947 					}
1948 				}
1949 
1950 				last_full_event_resid = uio_resid(uio);
1951 			}
1952 		}
1953 
1954 		watcher->event_queue[watcher->rd] = NULL;
1955 		watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1956 		OSSynchronizeIO();
1957 		release_event_ref(kfse);
1958 	}
1959 	lck_rw_unlock_shared(&event_handling_lock);
1960 
1961 	if (skipped && error == 0) {
1962 		goto restart_watch;
1963 	}
1964 
1965 get_out:
1966 	OSAddAtomic(-1, &watcher->num_readers);
1967 
1968 	return error;
1969 }
1970 
1971 
1972 //
1973 // Shoo watchers away from a volume that's about to be unmounted
1974 // (so that it can be cleanly unmounted).
1975 //
1976 void
fsevent_unmount(__unused struct mount * mp,__unused vfs_context_t ctx)1977 fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
1978 {
1979 #if !defined(XNU_TARGET_OS_OSX)
1980 	dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
1981 	int error, waitcount = 0;
1982 	struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
1983 
1984 	// wait for any other pending unmounts to complete
1985 	lock_watch_table();
1986 	while (fsevent_unmount_dev != 0) {
1987 		error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
1988 		if (error == EWOULDBLOCK) {
1989 			error = 0;
1990 		}
1991 		if (!error && (++waitcount >= 10)) {
1992 			error = EWOULDBLOCK;
1993 			printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
1994 		}
1995 		if (error) {
1996 			// there's a problem, bail out
1997 			unlock_watch_table();
1998 			return;
1999 		}
2000 	}
2001 	if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
2002 		// nobody watching for unmount pending events
2003 		unlock_watch_table();
2004 		return;
2005 	}
2006 	// this is now the current unmount pending
2007 	fsevent_unmount_dev = dev;
2008 	fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
2009 	unlock_watch_table();
2010 
2011 	// send an event to notify the watcher they need to get off the mount
2012 	error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
2013 
2014 	// wait for acknowledgment(s) (give up if it takes too long)
2015 	lock_watch_table();
2016 	waitcount = 0;
2017 	while (fsevent_unmount_dev == dev) {
2018 		error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
2019 		if (error == EWOULDBLOCK) {
2020 			error = 0;
2021 		}
2022 		if (!error && (++waitcount >= 10)) {
2023 			error = EWOULDBLOCK;
2024 			printf("unmount pending ack timeout for dev %d\n", dev);
2025 		}
2026 		if (error) {
2027 			// there's a problem, bail out
2028 			if (fsevent_unmount_dev == dev) {
2029 				fsevent_unmount_dev = 0;
2030 				fsevent_unmount_ack_count = 0;
2031 			}
2032 			wakeup((caddr_t)&fsevent_unmount_dev);
2033 			break;
2034 		}
2035 	}
2036 	unlock_watch_table();
2037 #endif /* ! XNU_TARGET_OS_OSX */
2038 }
2039 
2040 
2041 //
2042 // /dev/fsevents device code
2043 //
2044 static int fsevents_installed = 0;
2045 
2046 typedef struct fsevent_handle {
2047 	UInt32            flags;
2048 	SInt32            active;
2049 	fs_event_watcher *watcher;
2050 	struct klist      knotes;
2051 	struct selinfo    si;
2052 } fsevent_handle;
2053 
2054 #define FSEH_CLOSING   0x0001
2055 
2056 static int
fseventsf_read(struct fileproc * fp,struct uio * uio,__unused int flags,__unused vfs_context_t ctx)2057 fseventsf_read(struct fileproc *fp, struct uio *uio,
2058     __unused int flags, __unused vfs_context_t ctx)
2059 {
2060 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2061 	int error;
2062 
2063 	error = fmod_watch(fseh->watcher, uio);
2064 
2065 	return error;
2066 }
2067 
2068 
2069 #pragma pack(push, 4)
2070 typedef struct fsevent_dev_filter_args32 {
2071 	uint32_t            num_devices;
2072 	user32_addr_t       devices;
2073 } fsevent_dev_filter_args32;
2074 typedef struct fsevent_dev_filter_args64 {
2075 	uint32_t            num_devices;
2076 	user64_addr_t       devices;
2077 } fsevent_dev_filter_args64;
2078 #pragma pack(pop)
2079 
2080 #define FSEVENTS_DEVICE_FILTER_32       _IOW('s', 100, fsevent_dev_filter_args32)
2081 #define FSEVENTS_DEVICE_FILTER_64       _IOW('s', 100, fsevent_dev_filter_args64)
2082 
2083 static int
fseventsf_ioctl(struct fileproc * fp,u_long cmd,caddr_t data,vfs_context_t ctx)2084 fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
2085 {
2086 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2087 	int ret = 0;
2088 	fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
2089 
2090 	OSAddAtomic(1, &fseh->active);
2091 	if (fseh->flags & FSEH_CLOSING) {
2092 		OSAddAtomic(-1, &fseh->active);
2093 		return 0;
2094 	}
2095 
2096 	switch (cmd) {
2097 	case FIONBIO:
2098 	case FIOASYNC:
2099 		break;
2100 
2101 	case FSEVENTS_WANT_COMPACT_EVENTS: {
2102 		fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
2103 		break;
2104 	}
2105 
2106 	case FSEVENTS_WANT_EXTENDED_INFO: {
2107 		fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
2108 		break;
2109 	}
2110 
2111 	case FSEVENTS_GET_CURRENT_ID: {
2112 		*(uint64_t *)data = fseh->watcher->max_event_id;
2113 		ret = 0;
2114 		break;
2115 	}
2116 
2117 	case FSEVENTS_DEVICE_FILTER_32: {
2118 		if (proc_is64bit(vfs_context_proc(ctx))) {
2119 			ret = EINVAL;
2120 			break;
2121 		}
2122 		fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
2123 
2124 		devfilt_args = &_devfilt_args;
2125 		memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
2126 		devfilt_args->num_devices = devfilt_args32->num_devices;
2127 		devfilt_args->devices     = CAST_USER_ADDR_T(devfilt_args32->devices);
2128 		goto handle_dev_filter;
2129 	}
2130 
2131 	case FSEVENTS_DEVICE_FILTER_64:
2132 		if (!proc_is64bit(vfs_context_proc(ctx))) {
2133 			ret = EINVAL;
2134 			break;
2135 		}
2136 		devfilt_args = (fsevent_dev_filter_args64 *)data;
2137 
2138 handle_dev_filter:
2139 		{
2140 			int new_num_devices, old_num_devices = 0;
2141 			dev_t *devices_not_to_watch, *tmp = NULL;
2142 
2143 			if (devfilt_args->num_devices > 256) {
2144 				ret = EINVAL;
2145 				break;
2146 			}
2147 
2148 			new_num_devices = devfilt_args->num_devices;
2149 			if (new_num_devices == 0) {
2150 				lock_watch_table();
2151 
2152 				tmp = fseh->watcher->devices_not_to_watch;
2153 				fseh->watcher->devices_not_to_watch = NULL;
2154 				old_num_devices = fseh->watcher->num_devices;
2155 				fseh->watcher->num_devices = new_num_devices;
2156 
2157 				unlock_watch_table();
2158 				kfree_data(tmp, old_num_devices * sizeof(dev_t));
2159 				break;
2160 			}
2161 
2162 			devices_not_to_watch = kalloc_data(new_num_devices * sizeof(dev_t), Z_WAITOK);
2163 			if (devices_not_to_watch == NULL) {
2164 				ret = ENOMEM;
2165 				break;
2166 			}
2167 
2168 			ret = copyin((user_addr_t)devfilt_args->devices,
2169 			    (void *)devices_not_to_watch,
2170 			    new_num_devices * sizeof(dev_t));
2171 			if (ret) {
2172 				kfree_data(devices_not_to_watch, new_num_devices * sizeof(dev_t));
2173 				break;
2174 			}
2175 
2176 			lock_watch_table();
2177 			old_num_devices = fseh->watcher->num_devices;
2178 			fseh->watcher->num_devices = new_num_devices;
2179 			tmp = fseh->watcher->devices_not_to_watch;
2180 			fseh->watcher->devices_not_to_watch = devices_not_to_watch;
2181 			unlock_watch_table();
2182 
2183 			kfree_data(tmp, old_num_devices * sizeof(dev_t));
2184 
2185 			break;
2186 		}
2187 
2188 	case FSEVENTS_UNMOUNT_PENDING_ACK: {
2189 		lock_watch_table();
2190 		dev_t dev = *(dev_t *)data;
2191 		if (fsevent_unmount_dev == dev) {
2192 			if (--fsevent_unmount_ack_count <= 0) {
2193 				fsevent_unmount_dev = 0;
2194 				wakeup((caddr_t)&fsevent_unmount_dev);
2195 			}
2196 		} else {
2197 			printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
2198 			ret = EINVAL;
2199 		}
2200 		unlock_watch_table();
2201 		break;
2202 	}
2203 
2204 	default:
2205 		ret = EINVAL;
2206 		break;
2207 	}
2208 
2209 	OSAddAtomic(-1, &fseh->active);
2210 	return ret;
2211 }
2212 
2213 
2214 static int
fseventsf_select(struct fileproc * fp,int which,__unused void * wql,vfs_context_t ctx)2215 fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
2216 {
2217 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2218 	int ready = 0;
2219 
2220 	if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
2221 		return 0;
2222 	}
2223 
2224 
2225 	// if there's nothing in the queue, we're not ready
2226 	if (fseh->watcher->rd != fseh->watcher->wr) {
2227 		ready = 1;
2228 	}
2229 
2230 	if (!ready) {
2231 		lock_watch_table();
2232 		selrecord(vfs_context_proc(ctx), &fseh->si, wql);
2233 		unlock_watch_table();
2234 	}
2235 
2236 	return ready;
2237 }
2238 
2239 
2240 #if NOTUSED
2241 static int
fseventsf_stat(__unused struct fileproc * fp,__unused struct stat * sb,__unused vfs_context_t ctx)2242 fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
2243 {
2244 	return ENOTSUP;
2245 }
2246 #endif
2247 
2248 static int
fseventsf_close(struct fileglob * fg,__unused vfs_context_t ctx)2249 fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
2250 {
2251 	fsevent_handle *fseh = (struct fsevent_handle *)fg_get_data(fg);
2252 	fs_event_watcher *watcher;
2253 
2254 	OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
2255 	while (OSAddAtomic(0, &fseh->active) > 0) {
2256 		tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", 1);
2257 	}
2258 
2259 	watcher = fseh->watcher;
2260 	fg_set_data(fg, NULL);
2261 	fseh->watcher = NULL;
2262 
2263 	remove_watcher(watcher);
2264 	selthreadclear(&fseh->si);
2265 	kfree_type(fsevent_handle, fseh);
2266 
2267 	return 0;
2268 }
2269 
2270 static void
filt_fsevent_detach(struct knote * kn)2271 filt_fsevent_detach(struct knote *kn)
2272 {
2273 	fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2274 
2275 	lock_watch_table();
2276 
2277 	KNOTE_DETACH(&fseh->knotes, kn);
2278 
2279 	unlock_watch_table();
2280 }
2281 
2282 /*
2283  * Determine whether this knote should be active
2284  *
2285  * This is kind of subtle.
2286  *      --First, notice if the vnode has been revoked: in so, override hint
2287  *      --EVFILT_READ knotes are checked no matter what the hint is
2288  *      --Other knotes activate based on hint.
2289  *      --If hint is revoke, set special flags and activate
2290  */
2291 static int
filt_fsevent_common(struct knote * kn,struct kevent_qos_s * kev,long hint)2292 filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
2293 {
2294 	fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2295 	int activate = 0;
2296 	int32_t rd, wr, amt;
2297 	int64_t data = 0;
2298 
2299 	if (NOTE_REVOKE == hint) {
2300 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2301 		activate = 1;
2302 	}
2303 
2304 	rd = fseh->watcher->rd;
2305 	wr = fseh->watcher->wr;
2306 	if (rd <= wr) {
2307 		amt = wr - rd;
2308 	} else {
2309 		amt = fseh->watcher->eventq_size - (rd - wr);
2310 	}
2311 
2312 	switch (kn->kn_filter) {
2313 	case EVFILT_READ:
2314 		data = amt;
2315 		activate = (data != 0);
2316 		break;
2317 	case EVFILT_VNODE:
2318 		/* Check events this note matches against the hint */
2319 		if (kn->kn_sfflags & hint) {
2320 			kn->kn_fflags |= (uint32_t)hint;     /* Set which event occurred */
2321 		}
2322 		if (kn->kn_fflags != 0) {
2323 			activate = 1;
2324 		}
2325 		break;
2326 	default:
2327 		// nothing to do...
2328 		break;
2329 	}
2330 
2331 	if (activate && kev) {
2332 		knote_fill_kevent(kn, kev, data);
2333 	}
2334 	return activate;
2335 }
2336 
2337 static int
filt_fsevent(struct knote * kn,long hint)2338 filt_fsevent(struct knote *kn, long hint)
2339 {
2340 	return filt_fsevent_common(kn, NULL, hint);
2341 }
2342 
2343 static int
filt_fsevent_touch(struct knote * kn,struct kevent_qos_s * kev)2344 filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
2345 {
2346 	int res;
2347 
2348 	lock_watch_table();
2349 
2350 	/* accept new fflags/data as saved */
2351 	kn->kn_sfflags = kev->fflags;
2352 	kn->kn_sdata = kev->data;
2353 
2354 	/* restrict the current results to the (smaller?) set of new interest */
2355 	/*
2356 	 * For compatibility with previous implementations, we leave kn_fflags
2357 	 * as they were before.
2358 	 */
2359 	//kn->kn_fflags &= kev->fflags;
2360 
2361 	/* determine if the filter is now fired */
2362 	res = filt_fsevent_common(kn, NULL, 0);
2363 
2364 	unlock_watch_table();
2365 
2366 	return res;
2367 }
2368 
2369 static int
filt_fsevent_process(struct knote * kn,struct kevent_qos_s * kev)2370 filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
2371 {
2372 	int res;
2373 
2374 	lock_watch_table();
2375 
2376 	res = filt_fsevent_common(kn, kev, 0);
2377 
2378 	unlock_watch_table();
2379 
2380 	return res;
2381 }
2382 
2383 SECURITY_READ_ONLY_EARLY(struct  filterops) fsevent_filtops = {
2384 	.f_isfd = 1,
2385 	.f_attach = NULL,
2386 	.f_detach = filt_fsevent_detach,
2387 	.f_event = filt_fsevent,
2388 	.f_touch = filt_fsevent_touch,
2389 	.f_process = filt_fsevent_process,
2390 };
2391 
2392 static int
fseventsf_kqfilter(struct fileproc * fp,struct knote * kn,__unused struct kevent_qos_s * kev)2393 fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
2394     __unused struct kevent_qos_s *kev)
2395 {
2396 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2397 	int res;
2398 
2399 	kn->kn_filtid = EVFILTID_FSEVENT;
2400 	knote_kn_hook_set_raw(kn, (void *) fseh);
2401 
2402 	lock_watch_table();
2403 
2404 	KNOTE_ATTACH(&fseh->knotes, kn);
2405 
2406 	/* check to see if it is fired already */
2407 	res = filt_fsevent_common(kn, NULL, 0);
2408 
2409 	unlock_watch_table();
2410 
2411 	return res;
2412 }
2413 
2414 
2415 static int
fseventsf_drain(struct fileproc * fp,__unused vfs_context_t ctx)2416 fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2417 {
2418 	int counter = 0;
2419 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2420 
2421 	// if there are people still waiting, sleep for 10ms to
2422 	// let them clean up and get out of there.  however we
2423 	// also don't want to get stuck forever so if they don't
2424 	// exit after 5 seconds we're tearing things down anyway.
2425 	while (fseh->watcher->blockers && counter++ < 500) {
2426 		// issue wakeup in case anyone is blocked waiting for an event
2427 		// do this each time we wakeup in case the blocker missed
2428 		// the wakeup due to the unprotected test of WATCHER_CLOSING
2429 		// and decision to tsleep in fmod_watch... this bit of
2430 		// latency is a decent tradeoff against not having to
2431 		// take and drop a lock in fmod_watch
2432 		lock_watch_table();
2433 		fsevents_wakeup(fseh->watcher);
2434 		unlock_watch_table();
2435 
2436 		tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1);
2437 	}
2438 
2439 	return 0;
2440 }
2441 
2442 
2443 static int
fseventsopen(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2444 fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2445 {
2446 	if (!kauth_cred_issuser(kauth_cred_get())) {
2447 		return EPERM;
2448 	}
2449 
2450 	return 0;
2451 }
2452 
2453 static int
fseventsclose(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2454 fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2455 {
2456 	return 0;
2457 }
2458 
2459 static int
fseventsread(__unused dev_t dev,__unused struct uio * uio,__unused int ioflag)2460 fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
2461 {
2462 	return EIO;
2463 }
2464 
2465 
2466 static int
parse_buffer_and_add_events(const char * buffer,size_t bufsize,vfs_context_t ctx,size_t * remainder)2467 parse_buffer_and_add_events(const char *buffer, size_t bufsize, vfs_context_t ctx, size_t *remainder)
2468 {
2469 	const fse_info *finfo, *dest_finfo;
2470 	const char *path, *ptr, *dest_path, *event_start = buffer;
2471 	size_t path_len, dest_path_len;
2472 	int type, err = 0;
2473 
2474 
2475 	ptr = buffer;
2476 	while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
2477 		type = *(const int *)ptr;
2478 		if (type < 0 || type == FSE_ACCESS_GRANTED ||
2479 		    type >= FSE_MAX_EVENTS) {
2480 			err = EINVAL;
2481 			break;
2482 		}
2483 
2484 		ptr += sizeof(int);
2485 
2486 		finfo = (const fse_info *)ptr;
2487 		ptr += sizeof(fse_info);
2488 
2489 		path = ptr;
2490 		while (ptr < buffer + bufsize && *ptr != '\0') {
2491 			ptr++;
2492 		}
2493 
2494 		if (ptr >= buffer + bufsize) {
2495 			break;
2496 		}
2497 
2498 		ptr++; // advance over the trailing '\0'
2499 
2500 		path_len = ptr - path;
2501 
2502 		if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2503 			event_start = ptr; // record where the next event starts
2504 
2505 			err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2506 			if (err) {
2507 				break;
2508 			}
2509 			continue;
2510 		}
2511 
2512 		//
2513 		// if we're here we have to slurp up the destination finfo
2514 		// and path so that we can pass them to the add_fsevent()
2515 		// call.  basically it's a copy of the above code.
2516 		//
2517 		dest_finfo = (const fse_info *)ptr;
2518 		ptr += sizeof(fse_info);
2519 
2520 		dest_path = ptr;
2521 		while (ptr < buffer + bufsize && *ptr != '\0') {
2522 			ptr++;
2523 		}
2524 
2525 		if (ptr >= buffer + bufsize) {
2526 			break;
2527 		}
2528 
2529 		ptr++;       // advance over the trailing '\0'
2530 		event_start = ptr; // record where the next event starts
2531 
2532 		dest_path_len = ptr - dest_path;
2533 		//
2534 		// If the destination inode number is non-zero, generate a rename
2535 		// with both source and destination FSE_ARG_FINFO. Otherwise generate
2536 		// a rename with only one FSE_ARG_FINFO. If you need to inject an
2537 		// exchange with an inode of zero, just make that inode (and its path)
2538 		// come in as the first one, not the second.
2539 		//
2540 		if (dest_finfo->ino) {
2541 			err = add_fsevent(type, ctx,
2542 			    FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2543 			    FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2544 			    FSE_ARG_DONE);
2545 		} else {
2546 			err = add_fsevent(type, ctx,
2547 			    FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2548 			    FSE_ARG_STRING, dest_path_len, dest_path,
2549 			    FSE_ARG_DONE);
2550 		}
2551 
2552 		if (err) {
2553 			break;
2554 		}
2555 	}
2556 
2557 	// if the last event wasn't complete, set the remainder
2558 	// to be the last event start boundary.
2559 	//
2560 	*remainder = (long)((buffer + bufsize) - event_start);
2561 
2562 	return err;
2563 }
2564 
2565 
2566 //
2567 // Note: this buffer size can not ever be less than
2568 //       2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
2569 //       because that is the max size for a single event.
2570 //       I made it 4k to be a "nice" size.  making it
2571 //       smaller is not a good idea.
2572 //
2573 #define WRITE_BUFFER_SIZE  4096
2574 static char *write_buffer = NULL;
2575 
2576 static int
fseventswrite(__unused dev_t dev,struct uio * uio,__unused int ioflag)2577 fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
2578 {
2579 	int error = 0;
2580 	size_t count, offset = 0, remainder = 0;
2581 	vfs_context_t ctx = vfs_context_current();
2582 
2583 	lck_mtx_lock(&event_writer_lock);
2584 
2585 	if (write_buffer == NULL) {
2586 		write_buffer = zalloc_permanent(WRITE_BUFFER_SIZE, ZALIGN_64);
2587 	}
2588 
2589 	//
2590 	// this loop copies in and processes the events written.
2591 	// it takes care to copy in reasonable size chunks and
2592 	// process them.  if there is an event that spans a chunk
2593 	// boundary we're careful to copy those bytes down to the
2594 	// beginning of the buffer and read the next chunk in just
2595 	// after it.
2596 	//
2597 	while (uio_resid(uio)) {
2598 		count = MIN(WRITE_BUFFER_SIZE - offset, (size_t)uio_resid(uio));
2599 
2600 		error = uiomove(write_buffer + offset, (int)count, uio);
2601 		if (error) {
2602 			break;
2603 		}
2604 
2605 		error = parse_buffer_and_add_events(write_buffer, offset + count, ctx, &remainder);
2606 		if (error) {
2607 			break;
2608 		}
2609 
2610 		//
2611 		// if there's any remainder, copy it down to the beginning
2612 		// of the buffer so that it will get processed the next time
2613 		// through the loop.  note that the remainder always starts
2614 		// at an event boundary.
2615 		//
2616 		memmove(write_buffer, (write_buffer + count + offset) - remainder, remainder);
2617 		offset = remainder;
2618 	}
2619 
2620 	lck_mtx_unlock(&event_writer_lock);
2621 
2622 	return error;
2623 }
2624 
2625 
2626 static const struct fileops fsevents_fops = {
2627 	.fo_type     = DTYPE_FSEVENTS,
2628 	.fo_read     = fseventsf_read,
2629 	.fo_write    = fo_no_write,
2630 	.fo_ioctl    = fseventsf_ioctl,
2631 	.fo_select   = fseventsf_select,
2632 	.fo_close    = fseventsf_close,
2633 	.fo_kqfilter = fseventsf_kqfilter,
2634 	.fo_drain    = fseventsf_drain,
2635 };
2636 
2637 typedef struct fsevent_clone_args32 {
2638 	user32_addr_t       event_list;
2639 	int32_t             num_events;
2640 	int32_t             event_queue_depth;
2641 	user32_addr_t       fd;
2642 } fsevent_clone_args32;
2643 
2644 typedef struct fsevent_clone_args64 {
2645 	user64_addr_t       event_list;
2646 	int32_t             num_events;
2647 	int32_t             event_queue_depth;
2648 	user64_addr_t       fd;
2649 } fsevent_clone_args64;
2650 
2651 #define FSEVENTS_CLONE_32       _IOW('s', 1, fsevent_clone_args32)
2652 #define FSEVENTS_CLONE_64       _IOW('s', 1, fsevent_clone_args64)
2653 
2654 static int
fseventsioctl(__unused dev_t dev,u_long cmd,caddr_t data,__unused int flag,struct proc * p)2655 fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2656 {
2657 	struct fileproc *f;
2658 	int fd, error;
2659 	fsevent_handle *fseh = NULL;
2660 	fsevent_clone_args64 *fse_clone_args, _fse_clone;
2661 	int8_t *event_list;
2662 	int is64bit = proc_is64bit(p);
2663 
2664 	switch (cmd) {
2665 	case FSEVENTS_CLONE_32: {
2666 		if (is64bit) {
2667 			return EINVAL;
2668 		}
2669 		fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
2670 
2671 		fse_clone_args = &_fse_clone;
2672 		memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
2673 
2674 		fse_clone_args->event_list        = CAST_USER_ADDR_T(args32->event_list);
2675 		fse_clone_args->num_events        = args32->num_events;
2676 		fse_clone_args->event_queue_depth = args32->event_queue_depth;
2677 		fse_clone_args->fd                = CAST_USER_ADDR_T(args32->fd);
2678 		goto handle_clone;
2679 	}
2680 
2681 	case FSEVENTS_CLONE_64:
2682 		if (!is64bit) {
2683 			return EINVAL;
2684 		}
2685 		fse_clone_args = (fsevent_clone_args64 *)data;
2686 
2687 handle_clone:
2688 		if (fse_clone_args->num_events <= 0 || fse_clone_args->num_events > 4096) {
2689 			return EINVAL;
2690 		}
2691 
2692 		fseh = kalloc_type(fsevent_handle, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2693 
2694 		klist_init(&fseh->knotes);
2695 
2696 		event_list = kalloc_data(fse_clone_args->num_events * sizeof(int8_t), Z_WAITOK);
2697 		if (event_list == NULL) {
2698 			kfree_type(fsevent_handle, fseh);
2699 			return ENOMEM;
2700 		}
2701 
2702 		error = copyin((user_addr_t)fse_clone_args->event_list,
2703 		    (void *)event_list,
2704 		    fse_clone_args->num_events * sizeof(int8_t));
2705 		if (error) {
2706 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2707 			kfree_type(fsevent_handle, fseh);
2708 			return error;
2709 		}
2710 
2711 		/*
2712 		 * Lock down the user's "fd" result buffer so it's safe
2713 		 * to hold locks while we copy it out.
2714 		 */
2715 		error = vslock((user_addr_t)fse_clone_args->fd,
2716 		    sizeof(int32_t));
2717 		if (error) {
2718 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2719 			kfree_type(fsevent_handle, fseh);
2720 			return error;
2721 		}
2722 
2723 		error = add_watcher(event_list,
2724 		    fse_clone_args->num_events,
2725 		    fse_clone_args->event_queue_depth,
2726 		    &fseh->watcher,
2727 		    fseh);
2728 		if (error) {
2729 			vsunlock((user_addr_t)fse_clone_args->fd,
2730 			    sizeof(int32_t), 0);
2731 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2732 			kfree_type(fsevent_handle, fseh);
2733 			return error;
2734 		}
2735 
2736 		fseh->watcher->fseh = fseh;
2737 
2738 		error = falloc(p, &f, &fd, vfs_context_current());
2739 		if (error) {
2740 			remove_watcher(fseh->watcher);
2741 			vsunlock((user_addr_t)fse_clone_args->fd,
2742 			    sizeof(int32_t), 0);
2743 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2744 			kfree_type(fsevent_handle, fseh);
2745 			return error;
2746 		}
2747 		proc_fdlock(p);
2748 		f->fp_glob->fg_flag = FREAD | FWRITE;
2749 		f->fp_glob->fg_ops = &fsevents_fops;
2750 		fp_set_data(f, fseh);
2751 
2752 		/*
2753 		 * We can safely hold the proc_fdlock across this copyout()
2754 		 * because of the vslock() call above.  The vslock() call
2755 		 * also ensures that we will never get an error, so assert
2756 		 * this.
2757 		 */
2758 		error = copyout((void *)&fd, (user_addr_t)fse_clone_args->fd, sizeof(int32_t));
2759 		assert(error == 0);
2760 
2761 		procfdtbl_releasefd(p, fd, NULL);
2762 		fp_drop(p, fd, f, 1);
2763 		proc_fdunlock(p);
2764 
2765 		vsunlock((user_addr_t)fse_clone_args->fd,
2766 		    sizeof(int32_t), 1);
2767 		break;
2768 
2769 	default:
2770 		error = EINVAL;
2771 		break;
2772 	}
2773 
2774 	return error;
2775 }
2776 
2777 static void
fsevents_wakeup(fs_event_watcher * watcher)2778 fsevents_wakeup(fs_event_watcher *watcher)
2779 {
2780 	selwakeup(&watcher->fseh->si);
2781 	KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
2782 	wakeup((caddr_t)watcher);
2783 }
2784 
2785 
2786 /*
2787  * A struct describing which functions will get invoked for certain
2788  * actions.
2789  */
2790 static const struct cdevsw fsevents_cdevsw =
2791 {
2792 	.d_open = fseventsopen,
2793 	.d_close = fseventsclose,
2794 	.d_read = fseventsread,
2795 	.d_write = fseventswrite,
2796 	.d_ioctl = fseventsioctl,
2797 	.d_stop = eno_stop,
2798 	.d_reset = eno_reset,
2799 	.d_select = eno_select,
2800 	.d_mmap = eno_mmap,
2801 	.d_strategy = eno_strat,
2802 	.d_reserved_1 = eno_getc,
2803 	.d_reserved_2 = eno_putc,
2804 };
2805 
2806 
2807 /*
2808  * Called to initialize our device,
2809  * and to register ourselves with devfs
2810  */
2811 
2812 void
fsevents_init(void)2813 fsevents_init(void)
2814 {
2815 	int ret;
2816 
2817 	if (fsevents_installed) {
2818 		return;
2819 	}
2820 
2821 	fsevents_installed = 1;
2822 
2823 	ret = cdevsw_add(-1, &fsevents_cdevsw);
2824 	if (ret < 0) {
2825 		fsevents_installed = 0;
2826 		return;
2827 	}
2828 
2829 	devfs_make_node(makedev(ret, 0), DEVFS_CHAR,
2830 	    UID_ROOT, GID_WHEEL, 0644, "fsevents");
2831 
2832 	fsevents_internal_init();
2833 }
2834 
2835 
2836 char *
get_pathbuff(void)2837 get_pathbuff(void)
2838 {
2839 	return zalloc(ZV_NAMEI);
2840 }
2841 
2842 void
release_pathbuff(char * path)2843 release_pathbuff(char *path)
2844 {
2845 	if (path == NULL) {
2846 		return;
2847 	}
2848 	zfree(ZV_NAMEI, path);
2849 }
2850 
2851 int
get_fse_info(struct vnode * vp,fse_info * fse,__unused vfs_context_t ctx)2852 get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
2853 {
2854 	struct vnode_attr va;
2855 
2856 	VATTR_INIT(&va);
2857 	VATTR_WANTED(&va, va_fsid);
2858 	va.va_vaflags |= VA_REALFSID;
2859 	VATTR_WANTED(&va, va_fileid);
2860 	VATTR_WANTED(&va, va_mode);
2861 	VATTR_WANTED(&va, va_uid);
2862 	VATTR_WANTED(&va, va_document_id);
2863 	if (vp->v_flag & VISHARDLINK) {
2864 		if (vp->v_type == VDIR) {
2865 			VATTR_WANTED(&va, va_dirlinkcount);
2866 		} else {
2867 			VATTR_WANTED(&va, va_nlink);
2868 		}
2869 	}
2870 
2871 	if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
2872 		memset(fse, 0, sizeof(fse_info));
2873 		return -1;
2874 	}
2875 
2876 	return vnode_get_fse_info_from_vap(vp, fse, &va);
2877 }
2878 
2879 int
vnode_get_fse_info_from_vap(vnode_t vp,fse_info * fse,struct vnode_attr * vap)2880 vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap)
2881 {
2882 	fse->ino  = (ino64_t)vap->va_fileid;
2883 	fse->dev  = (dev_t)vap->va_fsid;
2884 	fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
2885 	fse->uid  = (uid_t)vap->va_uid;
2886 	fse->document_id  = (uint32_t)vap->va_document_id;
2887 	if (vp->v_flag & VISHARDLINK) {
2888 		fse->mode |= FSE_MODE_HLINK;
2889 		if (vp->v_type == VDIR) {
2890 			fse->nlink = (uint64_t)vap->va_dirlinkcount;
2891 		} else {
2892 			fse->nlink = (uint64_t)vap->va_nlink;
2893 		}
2894 	}
2895 
2896 	return 0;
2897 }
2898 
2899 void
create_fsevent_from_kevent(vnode_t vp,uint32_t kevents,struct vnode_attr * vap)2900 create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
2901 {
2902 	int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
2903 	char pathbuf[MAXPATHLEN];
2904 	fse_info fse;
2905 
2906 
2907 	if (kevents & VNODE_EVENT_DELETE) {
2908 		fsevent_type = FSE_DELETE;
2909 	} else if (kevents & (VNODE_EVENT_EXTEND | VNODE_EVENT_WRITE)) {
2910 		fsevent_type = FSE_CONTENT_MODIFIED;
2911 	} else if (kevents & VNODE_EVENT_LINK) {
2912 		fsevent_type = FSE_CREATE_FILE;
2913 	} else if (kevents & VNODE_EVENT_RENAME) {
2914 		fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
2915 	} else if (kevents & (VNODE_EVENT_FILE_CREATED | VNODE_EVENT_FILE_REMOVED | VNODE_EVENT_DIR_CREATED | VNODE_EVENT_DIR_REMOVED)) {
2916 		fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
2917 	} else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
2918 		fsevent_type = FSE_STAT_CHANGED;
2919 	}
2920 
2921 	// printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
2922 
2923 	fse.dev = vap->va_fsid;
2924 	fse.ino = vap->va_fileid;
2925 	fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode;
2926 	if (vp->v_flag & VISHARDLINK) {
2927 		fse.mode |= FSE_MODE_HLINK;
2928 		if (vp->v_type == VDIR) {
2929 			fse.nlink = vap->va_dirlinkcount;
2930 		} else {
2931 			fse.nlink = vap->va_nlink;
2932 		}
2933 	}
2934 
2935 	if (vp->v_type == VDIR) {
2936 		fse.mode |= FSE_REMOTE_DIR_EVENT;
2937 	}
2938 
2939 
2940 	fse.uid = vap->va_uid;
2941 	fse.document_id = vap->va_document_id;
2942 
2943 	len = sizeof(pathbuf);
2944 	if (vn_getpath_no_firmlink(vp, pathbuf, &len) == 0) {
2945 		add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
2946 	}
2947 	return;
2948 }
2949 
2950 #else /* CONFIG_FSE */
2951 
2952 #include <sys/fsevents.h>
2953 
2954 /*
2955  * The get_pathbuff and release_pathbuff routines are used in places not
2956  * related to fsevents, and it's a handy abstraction, so define trivial
2957  * versions that don't cache a pool of buffers.  This way, we don't have
2958  * to conditionalize the callers, and they still get the advantage of the
2959  * pool of buffers if CONFIG_FSE is turned on.
2960  */
2961 char *
get_pathbuff(void)2962 get_pathbuff(void)
2963 {
2964 	return zalloc(ZV_NAMEI);
2965 }
2966 
2967 void
release_pathbuff(char * path)2968 release_pathbuff(char *path)
2969 {
2970 	zfree(ZV_NAMEI, path);
2971 }
2972 
2973 int
add_fsevent(__unused int type,__unused vfs_context_t ctx,...)2974 add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
2975 {
2976 	return 0;
2977 }
2978 
2979 int
need_fsevent(__unused int type,__unused vnode_t vp)2980 need_fsevent(__unused int type, __unused vnode_t vp)
2981 {
2982 	return 0;
2983 }
2984 
2985 #endif /* CONFIG_FSE */
2986