xref: /xnu-8019.80.24/bsd/vfs/vfs_fsevents.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2004-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <stdarg.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/event.h>         // for kqueue related stuff
32 #include <sys/fsevents.h>
33 
34 #if CONFIG_FSE
35 #include <sys/namei.h>
36 #include <sys/filedesc.h>
37 #include <sys/kernel.h>
38 #include <sys/file_internal.h>
39 #include <sys/stat.h>
40 #include <sys/vnode_internal.h>
41 #include <sys/mount_internal.h>
42 #include <sys/proc_internal.h>
43 #include <sys/kauth.h>
44 #include <sys/uio.h>
45 #include <kern/kalloc.h>
46 #include <sys/dirent.h>
47 #include <sys/attr.h>
48 #include <sys/sysctl.h>
49 #include <sys/ubc.h>
50 #include <machine/cons.h>
51 #include <miscfs/specfs/specdev.h>
52 #include <miscfs/devfs/devfs.h>
53 #include <sys/filio.h>
54 #include <kern/locks.h>
55 #include <libkern/OSAtomic.h>
56 #include <kern/zalloc.h>
57 #include <mach/mach_time.h>
58 #include <kern/thread_call.h>
59 #include <kern/clock.h>
60 #include <IOKit/IOBSD.h>
61 
62 #include <security/audit/audit.h>
63 #include <bsm/audit_kevents.h>
64 
65 #include <pexpert/pexpert.h>
66 #include <libkern/section_keywords.h>
67 
68 typedef struct kfs_event {
69 	LIST_ENTRY(kfs_event) kevent_list;
70 	int16_t        type;       // type code of this event
71 	u_int16_t      flags,      // per-event flags
72 	    len;                   // the length of the path in "str"
73 	int32_t        refcount;   // number of clients referencing this
74 	pid_t          pid;        // pid of the process that did the op
75 
76 	uint64_t       abstime;    // when this event happened (mach_absolute_time())
77 	ino64_t        ino;
78 	dev_t          dev;
79 	int32_t        mode;
80 	uid_t          uid;
81 	gid_t          gid;
82 
83 	const char    *str;
84 
85 	struct kfs_event *dest; // if this is a two-file op
86 } kfs_event;
87 
88 // flags for the flags field
89 #define KFSE_COMBINED_EVENTS          0x0001
90 #define KFSE_CONTAINS_DROPPED_EVENTS  0x0002
91 #define KFSE_RECYCLED_EVENT           0x0004
92 #define KFSE_BEING_CREATED            0x0008
93 
94 LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
95 int num_events_outstanding = 0;
96 int num_pending_rename = 0;
97 
98 
99 struct fsevent_handle;
100 
101 typedef struct fs_event_watcher {
102 	int8_t      *event_list;         // the events we're interested in
103 	int32_t      num_events;
104 	dev_t       *devices_not_to_watch;// report events from devices not in this list
105 	uint32_t     num_devices;
106 	int32_t      flags;
107 	kfs_event  **event_queue;
108 	int32_t      eventq_size;        // number of event pointers in queue
109 	int32_t      num_readers;
110 	int32_t      rd;                 // read index into the event_queue
111 	int32_t      wr;                 // write index into the event_queue
112 	int32_t      blockers;
113 	int32_t      my_id;
114 	uint32_t     num_dropped;
115 	uint64_t     max_event_id;
116 	struct fsevent_handle *fseh;
117 	pid_t        pid;
118 	char         proc_name[(2 * MAXCOMLEN) + 1];
119 } fs_event_watcher;
120 
121 // fs_event_watcher flags
122 #define WATCHER_DROPPED_EVENTS         0x0001
123 #define WATCHER_CLOSING                0x0002
124 #define WATCHER_WANTS_COMPACT_EVENTS   0x0004
125 #define WATCHER_WANTS_EXTENDED_INFO    0x0008
126 #define WATCHER_APPLE_SYSTEM_SERVICE   0x0010   // fseventsd, coreservicesd, mds, revisiond
127 
128 #define MAX_WATCHERS  8
129 static fs_event_watcher *watcher_table[MAX_WATCHERS];
130 
131 #define DEFAULT_MAX_KFS_EVENTS   4096
132 static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
133 
134 // we allocate kfs_event structures out of this zone
135 static zone_t     event_zone;
136 static int        fs_event_init = 0;
137 
138 //
139 // this array records whether anyone is interested in a
140 // particular type of event.  if no one is, we bail out
141 // early from the event delivery
142 //
143 static int16_t     fs_event_type_watchers[FSE_MAX_EVENTS];
144 
145 // the device currently being unmounted:
146 static dev_t fsevent_unmount_dev = 0;
147 // how many ACKs are still outstanding:
148 static int fsevent_unmount_ack_count = 0;
149 
150 static int  watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse);
151 static void fsevents_wakeup(fs_event_watcher *watcher);
152 
153 //
154 // Locks
155 //
156 static LCK_ATTR_DECLARE(fsevent_lock_attr, 0, 0);
157 static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
158 static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
159 
160 static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
161     &fsevent_rw_group, &fsevent_lock_attr);
162 static LCK_MTX_DECLARE_ATTR(watch_table_lock,
163     &fsevent_mutex_group, &fsevent_lock_attr);
164 static LCK_MTX_DECLARE_ATTR(event_buf_lock,
165     &fsevent_mutex_group, &fsevent_lock_attr);
166 static LCK_MTX_DECLARE_ATTR(event_writer_lock,
167     &fsevent_mutex_group, &fsevent_lock_attr);
168 
169 
170 /* Explicitly declare qsort so compiler doesn't complain */
171 __private_extern__ void qsort(
172 	void * array,
173 	size_t nmembers,
174 	size_t member_size,
175 	int (*)(const void *, const void *));
176 
177 static int
is_ignored_directory(const char * path)178 is_ignored_directory(const char *path)
179 {
180 	if (!path) {
181 		return 0;
182 	}
183 
184 #define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
185 	if (IS_TLD("/.Spotlight-V100/") ||
186 	    IS_TLD("/.MobileBackups/") ||
187 	    IS_TLD("/Backups.backupdb/")) {
188 		return 1;
189 	}
190 #undef IS_TLD
191 
192 	return 0;
193 }
194 
195 static void
fsevents_internal_init(void)196 fsevents_internal_init(void)
197 {
198 	int i;
199 
200 	if (fs_event_init++ != 0) {
201 		return;
202 	}
203 
204 	for (i = 0; i < FSE_MAX_EVENTS; i++) {
205 		fs_event_type_watchers[i] = 0;
206 	}
207 
208 	memset(watcher_table, 0, sizeof(watcher_table));
209 
210 	PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
211 
212 	event_zone = zone_create_ext("fs-event-buf", sizeof(kfs_event),
213 	    ZC_NOGC | ZC_NOCALLOUT, ZONE_ID_ANY, ^(zone_t z) {
214 		// mark the zone as exhaustible so that it will not
215 		// ever grow beyond what we initially filled it with
216 		zone_set_exhaustible(z, max_kfs_events);
217 	});
218 
219 	zone_fill_initially(event_zone, max_kfs_events);
220 }
221 
222 static void
lock_watch_table(void)223 lock_watch_table(void)
224 {
225 	lck_mtx_lock(&watch_table_lock);
226 }
227 
228 static void
unlock_watch_table(void)229 unlock_watch_table(void)
230 {
231 	lck_mtx_unlock(&watch_table_lock);
232 }
233 
234 static void
lock_fs_event_list(void)235 lock_fs_event_list(void)
236 {
237 	lck_mtx_lock(&event_buf_lock);
238 }
239 
240 static void
unlock_fs_event_list(void)241 unlock_fs_event_list(void)
242 {
243 	lck_mtx_unlock(&event_buf_lock);
244 }
245 
246 // forward prototype
247 static void release_event_ref(kfs_event *kfse);
248 
249 static boolean_t
watcher_cares_about_dev(fs_event_watcher * watcher,dev_t dev)250 watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
251 {
252 	unsigned int i;
253 
254 	// if devices_not_to_watch is NULL then we care about all
255 	// events from all devices
256 	if (watcher->devices_not_to_watch == NULL) {
257 		return true;
258 	}
259 
260 	for (i = 0; i < watcher->num_devices; i++) {
261 		if (dev == watcher->devices_not_to_watch[i]) {
262 			// found a match! that means we do not
263 			// want events from this device.
264 			return false;
265 		}
266 	}
267 
268 	// if we're here it's not in the devices_not_to_watch[]
269 	// list so that means we do care about it
270 	return true;
271 }
272 
273 
274 int
need_fsevent(int type,vnode_t vp)275 need_fsevent(int type, vnode_t vp)
276 {
277 	if (type >= 0 && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == 0) {
278 		return 0;
279 	}
280 
281 	// events in /dev aren't really interesting...
282 	if (vp->v_tag == VT_DEVFS) {
283 		return 0;
284 	}
285 
286 	return 1;
287 }
288 
289 
290 #define is_throw_away(x)  ((x) == FSE_STAT_CHANGED || (x) == FSE_CONTENT_MODIFIED)
291 
292 
293 // Ways that an event can be reused:
294 //
295 // "combined" events mean that there were two events for
296 // the same vnode or path and we're combining both events
297 // into a single event.  The primary event gets a bit that
298 // marks it as having been combined.  The secondary event
299 // is essentially dropped and the kfse structure reused.
300 //
301 // "collapsed" means that multiple events below a given
302 // directory are collapsed into a single event.  in this
303 // case, the directory that we collapse into and all of
304 // its children must be re-scanned.
305 //
306 // "recycled" means that we're completely blowing away
307 // the event since there are other events that have info
308 // about the same vnode or path (and one of those other
309 // events will be marked as combined or collapsed as
310 // appropriate).
311 //
312 #define KFSE_COMBINED   0x0001
313 #define KFSE_COLLAPSED  0x0002
314 #define KFSE_RECYCLED   0x0004
315 
316 int num_dropped         = 0;
317 int num_parent_switch   = 0;
318 int num_recycled_rename = 0;
319 
320 static struct timeval last_print;
321 
322 //
323 // These variables are used to track coalescing multiple identical
324 // events for the same vnode/pathname.  If we get the same event
325 // type and same vnode/pathname as the previous event, we just drop
326 // the event since it's superfluous.  This improves some micro-
327 // benchmarks considerably and actually has a real-world impact on
328 // tests like a Finder copy where multiple stat-changed events can
329 // get coalesced.
330 //
331 static int     last_event_type = -1;
332 static void   *last_ptr = NULL;
333 static char    last_str[MAXPATHLEN];
334 static int     last_nlen = 0;
335 static int     last_vid = -1;
336 static uint64_t last_coalesced_time = 0;
337 static void   *last_event_ptr = NULL;
338 static pid_t last_pid = -1;
339 int            last_coalesced = 0;
340 static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
341 
342 #define MAX_HARDLINK_NOTIFICATIONS 128
343 
344 int
add_fsevent(int type,vfs_context_t ctx,...)345 add_fsevent(int type, vfs_context_t ctx, ...)
346 {
347 	struct proc      *p = vfs_context_proc(ctx);
348 	int               i, arg_type, ret;
349 	kfs_event        *kfse, *kfse_dest = NULL, *cur;
350 	fs_event_watcher *watcher;
351 	va_list           ap;
352 	int               error = 0, did_alloc = 0;
353 	int64_t           orig_linkcount = -1;
354 	dev_t             dev = 0;
355 	uint64_t          now, elapsed;
356 	uint64_t          orig_linkid = 0, next_linkid = 0;
357 	char             *pathbuff = NULL, *path_override = NULL;
358 	vnode_t           link_vp = NULL;
359 	int               pathbuff_len;
360 	uthread_t         ut = get_bsdthread_info(current_thread());
361 	bool              do_all_links = true;
362 
363 	if (type == FSE_CONTENT_MODIFIED_NO_HLINK) {
364 		do_all_links = false;
365 		type = FSE_CONTENT_MODIFIED;
366 	}
367 
368 
369 restart:
370 	va_start(ap, ctx);
371 
372 	// ignore bogus event types..
373 	if (type < 0 || type >= FSE_MAX_EVENTS) {
374 		return EINVAL;
375 	}
376 
377 	// if no one cares about this type of event, bail out
378 	if (fs_event_type_watchers[type] == 0) {
379 		va_end(ap);
380 
381 		return 0;
382 	}
383 
384 	now = mach_absolute_time();
385 
386 	// find a free event and snag it for our use
387 	// NOTE: do not do anything that would block until
388 	//       the lock is dropped.
389 	lock_fs_event_list();
390 
391 	//
392 	// check if this event is identical to the previous one...
393 	// (as long as it's not an event type that can never be the
394 	// same as a previous event)
395 	//
396 	if (path_override == NULL && type != FSE_CREATE_FILE && type != FSE_DELETE && type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CHOWN && type != FSE_DOCID_CHANGED && type != FSE_DOCID_CREATED && type != FSE_CLONE) {
397 		void *ptr = NULL;
398 		int   vid = 0, was_str = 0, nlen = 0;
399 
400 		for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
401 			switch (arg_type) {
402 			case FSE_ARG_VNODE: {
403 				ptr = va_arg(ap, void *);
404 				vid = vnode_vid((struct vnode *)ptr);
405 				last_str[0] = '\0';
406 				break;
407 			}
408 			case FSE_ARG_STRING: {
409 				nlen = va_arg(ap, int32_t);
410 				ptr = va_arg(ap, void *);
411 				was_str = 1;
412 				break;
413 			}
414 			}
415 			if (ptr != NULL) {
416 				break;
417 			}
418 		}
419 
420 		if (sTimebaseInfo.denom == 0) {
421 			(void) clock_timebase_info(&sTimebaseInfo);
422 		}
423 
424 		elapsed = (now - last_coalesced_time);
425 		if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
426 			if (sTimebaseInfo.denom == 1) {
427 				elapsed *= sTimebaseInfo.numer;
428 			} else {
429 				// this could overflow... the worst that will happen is that we'll
430 				// send (or not send) an extra event so I'm not going to worry about
431 				// doing the math right like dtrace_abs_to_nano() does.
432 				elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
433 			}
434 		}
435 
436 		if (type == last_event_type
437 		    && (elapsed < 1000000000)
438 		    && (last_pid == proc_getpid(p))
439 		    &&
440 		    ((vid && vid == last_vid && last_ptr == ptr)
441 		    ||
442 		    (last_str[0] && last_nlen == nlen && ptr && strcmp(last_str, ptr) == 0))
443 		    ) {
444 			last_coalesced++;
445 			unlock_fs_event_list();
446 			va_end(ap);
447 
448 			return 0;
449 		} else {
450 			last_ptr = ptr;
451 			if (was_str) {
452 				strlcpy(last_str, ptr, sizeof(last_str));
453 			}
454 			last_nlen = nlen;
455 			last_vid = vid;
456 			last_event_type = type;
457 			last_coalesced_time = now;
458 			last_pid = proc_getpid(p);
459 		}
460 	}
461 	va_start(ap, ctx);
462 
463 
464 	kfse = zalloc_noblock(event_zone);
465 	if (kfse && (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE)) {
466 		kfse_dest = zalloc_noblock(event_zone);
467 		if (kfse_dest == NULL) {
468 			did_alloc = 1;
469 			zfree(event_zone, kfse);
470 			kfse = NULL;
471 		}
472 	}
473 
474 
475 	if (kfse == NULL) {    // yikes! no free events
476 		unlock_fs_event_list();
477 		lock_watch_table();
478 
479 		for (i = 0; i < MAX_WATCHERS; i++) {
480 			watcher = watcher_table[i];
481 			if (watcher == NULL) {
482 				continue;
483 			}
484 
485 			watcher->flags |= WATCHER_DROPPED_EVENTS;
486 			fsevents_wakeup(watcher);
487 		}
488 		unlock_watch_table();
489 
490 		{
491 			struct timeval current_tv;
492 
493 			num_dropped++;
494 
495 			// only print a message at most once every 5 seconds
496 			microuptime(&current_tv);
497 			if ((current_tv.tv_sec - last_print.tv_sec) > 10) {
498 				int ii;
499 				void *junkptr = zalloc_noblock(event_zone), *listhead = kfse_list_head.lh_first;
500 
501 				printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
502 				printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
503 				printf("add_fsevent: zalloc sez: %p\n", junkptr);
504 				printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]);
505 				lock_watch_table();
506 				for (ii = 0; ii < MAX_WATCHERS; ii++) {
507 					if (watcher_table[ii] == NULL) {
508 						continue;
509 					}
510 
511 					printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
512 					    watcher_table[ii]->proc_name,
513 					    watcher_table[ii],
514 					    watcher_table[ii]->rd, watcher_table[ii]->wr,
515 					    watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
516 				}
517 				unlock_watch_table();
518 
519 				last_print = current_tv;
520 				if (junkptr) {
521 					zfree(event_zone, junkptr);
522 				}
523 			}
524 		}
525 
526 		if (pathbuff) {
527 			release_pathbuff(pathbuff);
528 			pathbuff = NULL;
529 		}
530 		return ENOSPC;
531 	}
532 
533 	memset(kfse, 0, sizeof(kfs_event));
534 	kfse->refcount = 1;
535 	OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags);
536 
537 	last_event_ptr = kfse;
538 	kfse->type     = (int16_t)type;
539 	kfse->abstime  = now;
540 	kfse->pid      = proc_getpid(p);
541 	if (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE) {
542 		memset(kfse_dest, 0, sizeof(kfs_event));
543 		kfse_dest->refcount = 1;
544 		OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse_dest->flags);
545 		kfse_dest->type     = (int16_t)type;
546 		kfse_dest->pid      = proc_getpid(p);
547 		kfse_dest->abstime  = now;
548 
549 		kfse->dest = kfse_dest;
550 	}
551 
552 	num_events_outstanding++;
553 	if (kfse->type == FSE_RENAME) {
554 		num_pending_rename++;
555 	}
556 	LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
557 
558 	if (kfse->refcount < 1) {
559 		panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
560 	}
561 
562 	unlock_fs_event_list(); // at this point it's safe to unlock
563 
564 	//
565 	// now process the arguments passed in and copy them into
566 	// the kfse
567 	//
568 
569 	cur = kfse;
570 
571 	if (type == FSE_DOCID_CREATED || type == FSE_DOCID_CHANGED) {
572 		uint64_t val;
573 
574 		//
575 		// These events are special and not like the other events.  They only
576 		// have a dev_t, src inode #, dest inode #, and a doc-id.  We use the
577 		// fields that we can in the kfse but have to overlay the dest inode
578 		// number and the doc-id on the other fields.
579 		//
580 
581 		// First the dev_t
582 		arg_type = va_arg(ap, int32_t);
583 		if (arg_type == FSE_ARG_DEV) {
584 			cur->dev = (dev_t)(va_arg(ap, dev_t));
585 		} else {
586 			cur->dev = (dev_t)0xbadc0de1;
587 		}
588 
589 		// next the source inode #
590 		arg_type = va_arg(ap, int32_t);
591 		if (arg_type == FSE_ARG_INO) {
592 			cur->ino = (ino64_t)(va_arg(ap, ino64_t));
593 		} else {
594 			cur->ino = 0xbadc0de2;
595 		}
596 
597 		// now the dest inode #
598 		arg_type = va_arg(ap, int32_t);
599 		if (arg_type == FSE_ARG_INO) {
600 			val = (ino64_t)(va_arg(ap, ino64_t));
601 		} else {
602 			val = 0xbadc0de2;
603 		}
604 		// overlay the dest inode number on the str/dest pointer fields
605 		__nochk_memcpy(&cur->str, &val, sizeof(ino64_t));
606 
607 
608 		// and last the document-id
609 		arg_type = va_arg(ap, int32_t);
610 		if (arg_type == FSE_ARG_INT32) {
611 			val = (uint64_t)va_arg(ap, uint32_t);
612 		} else if (arg_type == FSE_ARG_INT64) {
613 			val = (uint64_t)va_arg(ap, uint64_t);
614 		} else {
615 			val = 0xbadc0de3;
616 		}
617 
618 		// the docid is 64-bit and overlays the uid/gid fields
619 		static_assert(sizeof(cur->uid) + sizeof(cur->gid) == sizeof(val), "gid/uid size mismatch");
620 		static_assert(offsetof(struct kfs_event, gid) - offsetof(struct kfs_event, uid) == sizeof(cur->uid), "unexpected struct kfs_event layout");
621 		memcpy(&cur->uid, &val, sizeof(cur->uid));
622 		memcpy(&cur->gid, (u_int8_t *)&val + sizeof(cur->uid), sizeof(cur->gid));
623 
624 		goto done_with_args;
625 	}
626 
627 	if (type == FSE_UNMOUNT_PENDING) {
628 		// Just a dev_t
629 		arg_type = va_arg(ap, int32_t);
630 		if (arg_type == FSE_ARG_DEV) {
631 			cur->dev = (dev_t)(va_arg(ap, dev_t));
632 		} else {
633 			cur->dev = (dev_t)0xbadc0de1;
634 		}
635 
636 		goto done_with_args;
637 	}
638 
639 	for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
640 		switch (arg_type) {
641 		case FSE_ARG_VNODE: {
642 			// this expands out into multiple arguments to the client
643 			struct vnode *vp;
644 			struct vnode_attr va;
645 
646 			if (kfse->str != NULL) {
647 				cur = kfse_dest;
648 			}
649 
650 			vp = va_arg(ap, struct vnode *);
651 			if (vp == NULL) {
652 				panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!",
653 				    cur->type);
654 			}
655 
656 			VATTR_INIT(&va);
657 			VATTR_WANTED(&va, va_fsid);
658 			VATTR_WANTED(&va, va_fileid);
659 			VATTR_WANTED(&va, va_mode);
660 			VATTR_WANTED(&va, va_uid);
661 			VATTR_WANTED(&va, va_gid);
662 			VATTR_WANTED(&va, va_nlink);
663 			if ((ret = vnode_getattr(vp, &va, vfs_context_kernel())) != 0) {
664 				// printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
665 				cur->str = NULL;
666 				error = EINVAL;
667 				goto clean_up;
668 			}
669 
670 			cur->dev  = dev = (dev_t)va.va_fsid;
671 			cur->ino  = (ino64_t)va.va_fileid;
672 			cur->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
673 			cur->uid  = va.va_uid;
674 			cur->gid  = va.va_gid;
675 			if (vp->v_flag & VISHARDLINK) {
676 				cur->mode |= FSE_MODE_HLINK;
677 				if ((vp->v_type == VDIR && va.va_dirlinkcount == 0) || (vp->v_type == VREG && va.va_nlink == 0)) {
678 					cur->mode |= FSE_MODE_LAST_HLINK;
679 				}
680 				if (orig_linkid == 0) {
681 					orig_linkid = cur->ino;
682 					orig_linkcount = MIN(va.va_nlink, MAX_HARDLINK_NOTIFICATIONS);
683 					link_vp = vp;
684 				}
685 			}
686 
687 			// if we haven't gotten the path yet, get it.
688 			if (pathbuff == NULL && path_override == NULL) {
689 				pathbuff = get_pathbuff();
690 				pathbuff_len = MAXPATHLEN;
691 
692 				pathbuff[0] = '\0';
693 				if ((ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
694 					cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS;
695 
696 					do {
697 						if (vp->v_parent != NULL) {
698 							vp = vp->v_parent;
699 						} else if (vp->v_mount) {
700 							strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
701 							break;
702 						} else {
703 							vp = NULL;
704 						}
705 
706 						if (vp == NULL) {
707 							break;
708 						}
709 
710 						pathbuff_len = MAXPATHLEN;
711 						ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
712 					} while (ret == ENOSPC);
713 
714 					if (ret != 0 || vp == NULL) {
715 						error = ENOENT;
716 						goto clean_up;
717 					}
718 				}
719 			} else if (path_override) {
720 				pathbuff = path_override;
721 				pathbuff_len = (int)strlen(path_override) + 1;
722 			} else {
723 				strlcpy(pathbuff, "NOPATH", MAXPATHLEN);
724 				pathbuff_len = (int)strlen(pathbuff) + 1;
725 			}
726 
727 			// store the path by adding it to the global string table
728 			cur->len = (u_int16_t)pathbuff_len;
729 			cur->str = vfs_addname(pathbuff, pathbuff_len, 0, 0);
730 			if (cur->str == NULL || cur->str[0] == '\0') {
731 				panic("add_fsevent: was not able to add path %s to event %p.", pathbuff, cur);
732 			}
733 
734 			if (pathbuff != path_override) {
735 				release_pathbuff(pathbuff);
736 			}
737 			pathbuff = NULL;
738 
739 			break;
740 		}
741 
742 		case FSE_ARG_FINFO: {
743 			fse_info *fse;
744 
745 			fse = va_arg(ap, fse_info *);
746 
747 			cur->dev  = dev = (dev_t)fse->dev;
748 			cur->ino  = (ino64_t)fse->ino;
749 			cur->mode = (int32_t)fse->mode;
750 			cur->uid  = (uid_t)fse->uid;
751 			cur->gid  = (uid_t)fse->gid;
752 			// if it's a hard-link and this is the last link, flag it
753 			if (fse->mode & FSE_MODE_HLINK) {
754 				if (fse->nlink == 0) {
755 					cur->mode |= FSE_MODE_LAST_HLINK;
756 				}
757 				if (orig_linkid == 0) {
758 					orig_linkid = cur->ino;
759 					orig_linkcount = MIN(fse->nlink, MAX_HARDLINK_NOTIFICATIONS);
760 				}
761 			}
762 			if (cur->mode & FSE_TRUNCATED_PATH) {
763 				cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS;
764 				cur->mode &= ~FSE_TRUNCATED_PATH;
765 			}
766 			break;
767 		}
768 
769 		case FSE_ARG_STRING:
770 			if (kfse->str != NULL) {
771 				cur = kfse_dest;
772 			}
773 
774 			cur->len = (int16_t)(va_arg(ap, int32_t) & 0x7fff);
775 			if (cur->len >= 1) {
776 				cur->str = vfs_addname(va_arg(ap, char *), cur->len, 0, 0);
777 			} else {
778 				printf("add_fsevent: funny looking string length: %d\n", (int)cur->len);
779 				cur->len = 2;
780 				cur->str = vfs_addname("/", cur->len, 0, 0);
781 			}
782 			if (cur->str[0] == 0) {
783 				printf("add_fsevent: bogus looking string (len %d)\n", cur->len);
784 			}
785 			break;
786 
787 		case FSE_ARG_INT32: {
788 			uint32_t ival = (uint32_t)va_arg(ap, int32_t);
789 			kfse->uid = ival;
790 			break;
791 		}
792 
793 		default:
794 			printf("add_fsevent: unknown type %d\n", arg_type);
795 			// just skip one 32-bit word and hope we sync up...
796 			(void)va_arg(ap, int32_t);
797 		}
798 	}
799 
800 done_with_args:
801 	va_end(ap);
802 
803 	OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse->flags);
804 	if (kfse_dest) {
805 		OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse_dest->flags);
806 	}
807 
808 	//
809 	// now we have to go and let everyone know that
810 	// is interested in this type of event
811 	//
812 	lock_watch_table();
813 
814 	for (i = 0; i < MAX_WATCHERS; i++) {
815 		watcher = watcher_table[i];
816 		if (watcher == NULL) {
817 			continue;
818 		}
819 
820 		if (type < watcher->num_events
821 		    && watcher->event_list[type] == FSE_REPORT
822 		    && watcher_cares_about_dev(watcher, dev)) {
823 			if (watcher_add_event(watcher, kfse) != 0) {
824 				watcher->num_dropped++;
825 				continue;
826 			}
827 		}
828 
829 		// if (kfse->refcount < 1) {
830 		//    panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
831 		// }
832 	}
833 
834 	unlock_watch_table();
835 
836 clean_up:
837 
838 	if (pathbuff) {
839 		release_pathbuff(pathbuff);
840 		pathbuff = NULL;
841 	}
842 	// replicate events for sibling hardlinks
843 	if (do_all_links && (kfse->mode & FSE_MODE_HLINK) && !(kfse->mode & FSE_MODE_LAST_HLINK) && (type == FSE_STAT_CHANGED || type == FSE_CONTENT_MODIFIED || type == FSE_FINDER_INFO_CHANGED || type == FSE_XATTR_MODIFIED)) {
844 		if (orig_linkcount > 0 && orig_linkid != 0) {
845 #ifndef APFSIOC_NEXT_LINK
846 #define APFSIOC_NEXT_LINK  _IOWR('J', 10, uint64_t)
847 #endif
848 			if (path_override == NULL) {
849 				path_override = get_pathbuff();
850 			}
851 			if (next_linkid == 0) {
852 				next_linkid = orig_linkid;
853 			}
854 
855 			if (link_vp) {
856 				mount_t mp = NULL;
857 				vnode_t mnt_rootvp = NULL;
858 				int iret = -1;
859 
860 				mp = vnode_mount(link_vp);
861 				if (mp) {
862 					iret = VFS_ROOT(mp, &mnt_rootvp, vfs_context_kernel());
863 				}
864 
865 				if (iret == 0 && mnt_rootvp) {
866 					iret = VNOP_IOCTL(mnt_rootvp, APFSIOC_NEXT_LINK, (char *)&next_linkid, (int)0, vfs_context_kernel());
867 					vnode_put(mnt_rootvp);
868 				}
869 
870 				int32_t fsid0;
871 				int path_override_len = MAXPATHLEN;
872 
873 				// continue resolving hardlink paths if there is a valid next_linkid retrieved
874 				// file systems not supporting APFSIOC_NEXT_LINK will skip replicating events for sibling hardlinks
875 				if (iret == 0 && next_linkid != 0) {
876 					fsid0 = link_vp->v_mount->mnt_vfsstat.f_fsid.val[0];
877 					ut->uu_flag |= UT_KERN_RAGE_VNODES;
878 					if ((iret = fsgetpath_internal(ctx, fsid0, next_linkid, MAXPATHLEN, path_override, FSOPT_NOFIRMLINKPATH, &path_override_len)) == 0) {
879 						orig_linkcount--;
880 						ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
881 
882 						if (orig_linkcount >= 0) {
883 							release_event_ref(kfse);
884 							goto restart;
885 						}
886 					} else {
887 						// failed to get override path
888 						// encountered a broken link or the linkid has been deleted before retrieving the path
889 						orig_linkcount--;
890 						ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
891 
892 						if (orig_linkcount >= 0) {
893 							goto clean_up;
894 						}
895 					}
896 				}
897 			}
898 		}
899 	}
900 
901 	if (path_override) {
902 		release_pathbuff(path_override);
903 		path_override = NULL;
904 	}
905 
906 	release_event_ref(kfse);
907 
908 	return error;
909 }
910 
911 
912 static void
release_event_ref(kfs_event * kfse)913 release_event_ref(kfs_event *kfse)
914 {
915 	int old_refcount;
916 	kfs_event copy, dest_copy;
917 
918 
919 	old_refcount = OSAddAtomic(-1, &kfse->refcount);
920 	if (old_refcount > 1) {
921 		return;
922 	}
923 
924 	lock_fs_event_list();
925 	if (last_event_ptr == kfse) {
926 		last_event_ptr = NULL;
927 		last_event_type = -1;
928 		last_coalesced_time = 0;
929 	}
930 
931 	if (kfse->refcount < 0) {
932 		panic("release_event_ref: bogus kfse refcount %d", kfse->refcount);
933 	}
934 
935 	if (kfse->refcount > 0 || kfse->type == FSE_INVALID) {
936 		// This is very subtle.  Either of these conditions can
937 		// be true if an event got recycled while we were waiting
938 		// on the fs_event_list lock or the event got recycled,
939 		// delivered, _and_ free'd by someone else while we were
940 		// waiting on the fs event list lock.  In either case
941 		// we need to just unlock the list and return without
942 		// doing anything because if the refcount is > 0 then
943 		// someone else will take care of free'ing it and when
944 		// the kfse->type is invalid then someone else already
945 		// has handled free'ing the event (while we were blocked
946 		// on the event list lock).
947 		//
948 		unlock_fs_event_list();
949 		return;
950 	}
951 
952 	//
953 	// make a copy of this so we can free things without
954 	// holding the fs_event_buf lock
955 	//
956 	copy = *kfse;
957 	if (kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && kfse->dest && OSAddAtomic(-1, &kfse->dest->refcount) == 1) {
958 		dest_copy = *kfse->dest;
959 	} else {
960 		dest_copy.str  = NULL;
961 		dest_copy.len  = 0;
962 		dest_copy.type = FSE_INVALID;
963 	}
964 
965 	kfse->pid = kfse->type;         // save this off for debugging...
966 	kfse->uid = (uid_t)(long)kfse->str;   // save this off for debugging...
967 	kfse->gid = (gid_t)(long)current_thread();
968 
969 	kfse->str = (char *)0xdeadbeef;         // XXXdbg - catch any cheaters...
970 
971 	if (dest_copy.type != FSE_INVALID) {
972 		kfse->dest->str = (char *)0xbadc0de; // XXXdbg - catch any cheaters...
973 		kfse->dest->type = FSE_INVALID;
974 
975 		if (kfse->dest->kevent_list.le_prev != NULL) {
976 			num_events_outstanding--;
977 			LIST_REMOVE(kfse->dest, kevent_list);
978 			memset(&kfse->dest->kevent_list, 0xa5, sizeof(kfse->dest->kevent_list));
979 		}
980 
981 		zfree(event_zone, kfse->dest);
982 	}
983 
984 	// mark this fsevent as invalid
985 	{
986 		int otype;
987 
988 		otype = kfse->type;
989 		kfse->type = FSE_INVALID;
990 
991 		if (kfse->kevent_list.le_prev != NULL) {
992 			num_events_outstanding--;
993 			if (otype == FSE_RENAME) {
994 				num_pending_rename--;
995 			}
996 			LIST_REMOVE(kfse, kevent_list);
997 			memset(&kfse->kevent_list, 0, sizeof(kfse->kevent_list));
998 		}
999 	}
1000 
1001 	zfree(event_zone, kfse);
1002 
1003 	unlock_fs_event_list();
1004 
1005 	// if we have a pointer in the union
1006 	if (copy.str && copy.type != FSE_DOCID_CREATED && copy.type != FSE_DOCID_CHANGED) {
1007 		if (copy.len == 0) { // and it's not a string
1008 			panic("no more fref.vp!");
1009 			// vnode_rele_ext(copy.fref.vp, O_EVTONLY, 0);
1010 		} else {        // else it's a string
1011 			vfs_removename(copy.str);
1012 		}
1013 	}
1014 
1015 	if (dest_copy.type != FSE_INVALID && dest_copy.str) {
1016 		if (dest_copy.len == 0) {
1017 			panic("no more fref.vp!");
1018 			// vnode_rele_ext(dest_copy.fref.vp, O_EVTONLY, 0);
1019 		} else {
1020 			vfs_removename(dest_copy.str);
1021 		}
1022 	}
1023 }
1024 
1025 #define FSEVENTS_WATCHER_ENTITLEMENT            \
1026 	"com.apple.private.vfs.fsevents-watcher"
1027 
1028 static int
add_watcher(int8_t * event_list,int32_t num_events,int32_t eventq_size,fs_event_watcher ** watcher_out,void * fseh)1029 add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
1030 {
1031 	int               i;
1032 	fs_event_watcher *watcher;
1033 
1034 	if (eventq_size <= 0 || eventq_size > 100 * max_kfs_events) {
1035 		eventq_size = max_kfs_events;
1036 	}
1037 
1038 	// Note: the event_queue follows the fs_event_watcher struct
1039 	//       in memory so we only have to do one allocation
1040 	watcher = kalloc_type(fs_event_watcher, kfs_event *, eventq_size, Z_WAITOK);
1041 	if (watcher == NULL) {
1042 		return ENOMEM;
1043 	}
1044 
1045 	watcher->event_list   = event_list;
1046 	watcher->num_events   = num_events;
1047 	watcher->devices_not_to_watch = NULL;
1048 	watcher->num_devices  = 0;
1049 	watcher->flags        = 0;
1050 	watcher->event_queue  = (kfs_event **)&watcher[1];
1051 	watcher->eventq_size  = eventq_size;
1052 	watcher->rd           = 0;
1053 	watcher->wr           = 0;
1054 	watcher->blockers     = 0;
1055 	watcher->num_readers  = 0;
1056 	watcher->max_event_id = 0;
1057 	watcher->fseh         = fseh;
1058 	watcher->pid          = proc_selfpid();
1059 	proc_selfname(watcher->proc_name, sizeof(watcher->proc_name));
1060 
1061 	watcher->num_dropped  = 0;  // XXXdbg - debugging
1062 
1063 	if (IOTaskHasEntitlement(current_task(),
1064 	    FSEVENTS_WATCHER_ENTITLEMENT)) {
1065 		watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
1066 	} else if (!strncmp(watcher->proc_name, "fseventsd", sizeof(watcher->proc_name)) ||
1067 	    !strncmp(watcher->proc_name, "coreservicesd", sizeof(watcher->proc_name)) ||
1068 	    !strncmp(watcher->proc_name, "revisiond", sizeof(watcher->proc_name)) ||
1069 	    !strncmp(watcher->proc_name, "mds", sizeof(watcher->proc_name))) {
1070 		printf("fsevents: watcher %s (pid: %d) needs '%s' entitlement\n",
1071 		    watcher->proc_name, watcher->pid,
1072 		    FSEVENTS_WATCHER_ENTITLEMENT);
1073 		watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
1074 	} else {
1075 		printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported.  Migrate to FSEventsFramework\n",
1076 		    watcher->proc_name, watcher->pid);
1077 	}
1078 
1079 	lock_watch_table();
1080 
1081 	// find a slot for the new watcher
1082 	for (i = 0; i < MAX_WATCHERS; i++) {
1083 		if (watcher_table[i] == NULL) {
1084 			watcher->my_id   = i;
1085 			watcher_table[i] = watcher;
1086 			break;
1087 		}
1088 	}
1089 
1090 	if (i >= MAX_WATCHERS) {
1091 		printf("fsevents: too many watchers!\n");
1092 		unlock_watch_table();
1093 		kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1094 		return ENOSPC;
1095 	}
1096 
1097 	// now update the global list of who's interested in
1098 	// events of a particular type...
1099 	for (i = 0; i < num_events; i++) {
1100 		if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1101 			fs_event_type_watchers[i]++;
1102 		}
1103 	}
1104 
1105 	unlock_watch_table();
1106 
1107 	*watcher_out = watcher;
1108 
1109 	return 0;
1110 }
1111 
1112 
1113 
1114 static void
remove_watcher(fs_event_watcher * target)1115 remove_watcher(fs_event_watcher *target)
1116 {
1117 	int i, j, counter = 0;
1118 	fs_event_watcher *watcher;
1119 	kfs_event *kfse;
1120 
1121 	lock_watch_table();
1122 
1123 	for (j = 0; j < MAX_WATCHERS; j++) {
1124 		watcher = watcher_table[j];
1125 		if (watcher != target) {
1126 			continue;
1127 		}
1128 
1129 		watcher_table[j] = NULL;
1130 
1131 		for (i = 0; i < watcher->num_events; i++) {
1132 			if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1133 				fs_event_type_watchers[i]--;
1134 			}
1135 		}
1136 
1137 		if (watcher->flags & WATCHER_CLOSING) {
1138 			unlock_watch_table();
1139 			return;
1140 		}
1141 
1142 		// printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1143 		watcher->flags |= WATCHER_CLOSING;
1144 		OSAddAtomic(1, &watcher->num_readers);
1145 
1146 		unlock_watch_table();
1147 
1148 		while (watcher->num_readers > 1 && counter++ < 5000) {
1149 			lock_watch_table();
1150 			fsevents_wakeup(watcher); // in case they're asleep
1151 			unlock_watch_table();
1152 
1153 			tsleep(watcher, PRIBIO, "fsevents-close", 1);
1154 		}
1155 		if (counter++ >= 5000) {
1156 			// printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1157 			panic("fsevents: close: still have readers! (%d)", watcher->num_readers);
1158 		}
1159 
1160 		// drain the event_queue
1161 
1162 		lck_rw_lock_exclusive(&event_handling_lock);
1163 		while (watcher->rd != watcher->wr) {
1164 			kfse = watcher->event_queue[watcher->rd];
1165 			watcher->event_queue[watcher->rd] = NULL;
1166 			watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1167 			OSSynchronizeIO();
1168 			if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1169 				release_event_ref(kfse);
1170 			}
1171 		}
1172 		lck_rw_unlock_exclusive(&event_handling_lock);
1173 
1174 		kfree_data(watcher->event_list, watcher->num_events * sizeof(int8_t));
1175 		kfree_data(watcher->devices_not_to_watch, watcher->num_devices * sizeof(dev_t));
1176 		kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1177 		return;
1178 	}
1179 
1180 	unlock_watch_table();
1181 }
1182 
1183 
1184 #define EVENT_DELAY_IN_MS   10
1185 static thread_call_t event_delivery_timer = NULL;
1186 static int timer_set = 0;
1187 
1188 
1189 static void
delayed_event_delivery(__unused void * param0,__unused void * param1)1190 delayed_event_delivery(__unused void *param0, __unused void *param1)
1191 {
1192 	int i;
1193 
1194 	lock_watch_table();
1195 
1196 	for (i = 0; i < MAX_WATCHERS; i++) {
1197 		if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1198 			fsevents_wakeup(watcher_table[i]);
1199 		}
1200 	}
1201 
1202 	timer_set = 0;
1203 
1204 	unlock_watch_table();
1205 }
1206 
1207 
1208 //
1209 // The watch table must be locked before calling this function.
1210 //
1211 static void
schedule_event_wakeup(void)1212 schedule_event_wakeup(void)
1213 {
1214 	uint64_t deadline;
1215 
1216 	if (event_delivery_timer == NULL) {
1217 		event_delivery_timer = thread_call_allocate((thread_call_func_t)delayed_event_delivery, NULL);
1218 	}
1219 
1220 	clock_interval_to_deadline(EVENT_DELAY_IN_MS, 1000 * 1000, &deadline);
1221 
1222 	thread_call_enter_delayed(event_delivery_timer, deadline);
1223 	timer_set = 1;
1224 }
1225 
1226 
1227 
1228 #define MAX_NUM_PENDING  16
1229 
1230 //
1231 // NOTE: the watch table must be locked before calling
1232 //       this routine.
1233 //
1234 static int
watcher_add_event(fs_event_watcher * watcher,kfs_event * kfse)1235 watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse)
1236 {
1237 	if (kfse->abstime > watcher->max_event_id) {
1238 		watcher->max_event_id = kfse->abstime;
1239 	}
1240 
1241 	if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
1242 		watcher->flags |= WATCHER_DROPPED_EVENTS;
1243 		fsevents_wakeup(watcher);
1244 		return ENOSPC;
1245 	}
1246 
1247 	OSAddAtomic(1, &kfse->refcount);
1248 	watcher->event_queue[watcher->wr] = kfse;
1249 	OSSynchronizeIO();
1250 	watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
1251 
1252 	//
1253 	// wake up the watcher if there are more than MAX_NUM_PENDING events.
1254 	// otherwise schedule a timer (if one isn't already set) which will
1255 	// send any pending events if no more are received in the next
1256 	// EVENT_DELAY_IN_MS milli-seconds.
1257 	//
1258 	int32_t num_pending = 0;
1259 	if (watcher->rd < watcher->wr) {
1260 		num_pending = watcher->wr - watcher->rd;
1261 	}
1262 
1263 	if (watcher->rd > watcher->wr) {
1264 		num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1265 	}
1266 
1267 	if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1268 		/* Non-Apple Service is falling behind, start dropping events for this process */
1269 		lck_rw_lock_exclusive(&event_handling_lock);
1270 		while (watcher->rd != watcher->wr) {
1271 			kfse = watcher->event_queue[watcher->rd];
1272 			watcher->event_queue[watcher->rd] = NULL;
1273 			watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1274 			OSSynchronizeIO();
1275 			if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1276 				release_event_ref(kfse);
1277 			}
1278 		}
1279 		watcher->flags |= WATCHER_DROPPED_EVENTS;
1280 		lck_rw_unlock_exclusive(&event_handling_lock);
1281 
1282 		printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1283 		    watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1284 		    watcher->eventq_size, watcher->flags);
1285 
1286 		fsevents_wakeup(watcher);
1287 	} else if (num_pending > MAX_NUM_PENDING) {
1288 		fsevents_wakeup(watcher);
1289 	} else if (timer_set == 0) {
1290 		schedule_event_wakeup();
1291 	}
1292 
1293 	return 0;
1294 }
1295 
1296 static int
fill_buff(uint16_t type,int32_t size,const void * data,char * buff,int32_t * _buff_idx,int32_t buff_sz,struct uio * uio)1297 fill_buff(uint16_t type, int32_t size, const void *data,
1298     char *buff, int32_t *_buff_idx, int32_t buff_sz,
1299     struct uio *uio)
1300 {
1301 	int32_t amt, error = 0, buff_idx = *_buff_idx;
1302 	uint16_t tmp;
1303 
1304 	//
1305 	// the +1 on the size is to guarantee that the main data
1306 	// copy loop will always copy at least 1 byte
1307 	//
1308 	if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
1309 		if (buff_idx > uio_resid(uio)) {
1310 			error = ENOSPC;
1311 			goto get_out;
1312 		}
1313 
1314 		error = uiomove(buff, buff_idx, uio);
1315 		if (error) {
1316 			goto get_out;
1317 		}
1318 		buff_idx = 0;
1319 	}
1320 
1321 	// copy out the header (type & size)
1322 	memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
1323 	buff_idx += sizeof(uint16_t);
1324 
1325 	tmp = size & 0xffff;
1326 	memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
1327 	buff_idx += sizeof(uint16_t);
1328 
1329 	// now copy the body of the data, flushing along the way
1330 	// if the buffer fills up.
1331 	//
1332 	while (size > 0) {
1333 		amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1334 		memcpy(&buff[buff_idx], data, amt);
1335 
1336 		size -= amt;
1337 		buff_idx += amt;
1338 		data = (const char *)data + amt;
1339 		if (size > (buff_sz - buff_idx)) {
1340 			if (buff_idx > uio_resid(uio)) {
1341 				error = ENOSPC;
1342 				goto get_out;
1343 			}
1344 			error = uiomove(buff, buff_idx, uio);
1345 			if (error) {
1346 				goto get_out;
1347 			}
1348 			buff_idx = 0;
1349 		}
1350 
1351 		if (amt == 0) { // just in case...
1352 			break;
1353 		}
1354 	}
1355 
1356 get_out:
1357 	*_buff_idx = buff_idx;
1358 
1359 	return error;
1360 }
1361 
1362 
1363 static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)  __attribute__((noinline));
1364 
1365 static int
copy_out_kfse(fs_event_watcher * watcher,kfs_event * kfse,struct uio * uio)1366 copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
1367 {
1368 	int      error;
1369 	uint16_t tmp16;
1370 	int32_t  type;
1371 	kfs_event *cur;
1372 	char     evbuff[512];
1373 	int      evbuff_idx = 0;
1374 
1375 	if (kfse->type == FSE_INVALID) {
1376 		panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d fref ptr %p)", kfse, kfse->refcount, kfse->str);
1377 	}
1378 
1379 	if (kfse->flags & KFSE_BEING_CREATED) {
1380 		return 0;
1381 	}
1382 
1383 	if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) && kfse->dest == NULL) {
1384 		//
1385 		// This can happen if an event gets recycled but we had a
1386 		// pointer to it in our event queue.  The event is the
1387 		// destination of a rename or clone which we'll process separately
1388 		// (that is, another kfse points to this one so it's ok
1389 		// to skip this guy because we'll process it when we process
1390 		// the other one)
1391 		error = 0;
1392 		goto get_out;
1393 	}
1394 
1395 	if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1396 		type = (kfse->type & 0xfff);
1397 
1398 		if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1399 			type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1400 		} else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1401 			type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1402 		}
1403 	} else {
1404 		type = (int32_t)kfse->type;
1405 	}
1406 
1407 	// copy out the type of the event
1408 	memcpy(evbuff, &type, sizeof(int32_t));
1409 	evbuff_idx += sizeof(int32_t);
1410 
1411 	// copy out the pid of the person that generated the event
1412 	memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
1413 	evbuff_idx += sizeof(pid_t);
1414 
1415 	cur = kfse;
1416 
1417 copy_again:
1418 
1419 	if (kfse->type == FSE_DOCID_CHANGED || kfse->type == FSE_DOCID_CREATED) {
1420 		dev_t    dev  = cur->dev;
1421 		ino64_t    ino  = cur->ino;
1422 		uint64_t ival;
1423 
1424 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1425 		if (error != 0) {
1426 			goto get_out;
1427 		}
1428 
1429 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1430 		if (error != 0) {
1431 			goto get_out;
1432 		}
1433 
1434 		memcpy(&ino, &cur->str, sizeof(ino64_t));
1435 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1436 		if (error != 0) {
1437 			goto get_out;
1438 		}
1439 
1440 		memcpy(&ival, &cur->uid, sizeof(uint64_t)); // the docid gets stuffed into the ino field
1441 		error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &ival, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1442 		if (error != 0) {
1443 			goto get_out;
1444 		}
1445 
1446 		goto done;
1447 	}
1448 
1449 	if (kfse->type == FSE_UNMOUNT_PENDING) {
1450 		dev_t    dev  = cur->dev;
1451 
1452 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1453 		if (error != 0) {
1454 			goto get_out;
1455 		}
1456 
1457 		goto done;
1458 	}
1459 
1460 	if (cur->str == NULL || cur->str[0] == '\0') {
1461 		printf("copy_out_kfse:2: empty/short path (%s)\n", cur->str);
1462 		error = fill_buff(FSE_ARG_STRING, 2, "/", evbuff, &evbuff_idx, sizeof(evbuff), uio);
1463 	} else {
1464 		error = fill_buff(FSE_ARG_STRING, cur->len, cur->str, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1465 	}
1466 	if (error != 0) {
1467 		goto get_out;
1468 	}
1469 
1470 	if (cur->dev == 0 && cur->ino == 0) {
1471 		// this happens when a rename event happens and the
1472 		// destination of the rename did not previously exist.
1473 		// it thus has no other file info so skip copying out
1474 		// the stuff below since it isn't initialized
1475 		goto done;
1476 	}
1477 
1478 
1479 	if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1480 		int32_t finfo_size;
1481 
1482 		finfo_size = sizeof(dev_t) + sizeof(ino64_t) + sizeof(int32_t) + sizeof(uid_t) + sizeof(gid_t);
1483 		error = fill_buff(FSE_ARG_FINFO, finfo_size, &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1484 		if (error != 0) {
1485 			goto get_out;
1486 		}
1487 	} else {
1488 		error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &cur->dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1489 		if (error != 0) {
1490 			goto get_out;
1491 		}
1492 
1493 		error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1494 		if (error != 0) {
1495 			goto get_out;
1496 		}
1497 
1498 		error = fill_buff(FSE_ARG_MODE, sizeof(int32_t), &cur->mode, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1499 		if (error != 0) {
1500 			goto get_out;
1501 		}
1502 
1503 		error = fill_buff(FSE_ARG_UID, sizeof(uid_t), &cur->uid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1504 		if (error != 0) {
1505 			goto get_out;
1506 		}
1507 
1508 		error = fill_buff(FSE_ARG_GID, sizeof(gid_t), &cur->gid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1509 		if (error != 0) {
1510 			goto get_out;
1511 		}
1512 	}
1513 
1514 
1515 	if (cur->dest) {
1516 		cur = cur->dest;
1517 		goto copy_again;
1518 	}
1519 
1520 done:
1521 	// very last thing: the time stamp
1522 	error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1523 	if (error != 0) {
1524 		goto get_out;
1525 	}
1526 
1527 	// check if the FSE_ARG_DONE will fit
1528 	if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1529 		if (evbuff_idx > uio_resid(uio)) {
1530 			error = ENOSPC;
1531 			goto get_out;
1532 		}
1533 		error = uiomove(evbuff, evbuff_idx, uio);
1534 		if (error) {
1535 			goto get_out;
1536 		}
1537 		evbuff_idx = 0;
1538 	}
1539 
1540 	tmp16 = FSE_ARG_DONE;
1541 	memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
1542 	evbuff_idx += sizeof(uint16_t);
1543 
1544 	// flush any remaining data in the buffer (and hopefully
1545 	// in most cases this is the only uiomove we'll do)
1546 	if (evbuff_idx > uio_resid(uio)) {
1547 		error = ENOSPC;
1548 	} else {
1549 		error = uiomove(evbuff, evbuff_idx, uio);
1550 	}
1551 
1552 get_out:
1553 
1554 	return error;
1555 }
1556 
1557 
1558 
1559 static int
fmod_watch(fs_event_watcher * watcher,struct uio * uio)1560 fmod_watch(fs_event_watcher *watcher, struct uio *uio)
1561 {
1562 	int               error = 0;
1563 	user_ssize_t      last_full_event_resid;
1564 	kfs_event        *kfse;
1565 	uint16_t          tmp16;
1566 	int               skipped;
1567 
1568 	last_full_event_resid = uio_resid(uio);
1569 
1570 	// need at least 2048 bytes of space (maxpathlen + 1 event buf)
1571 	if (uio_resid(uio) < 2048 || watcher == NULL) {
1572 		return EINVAL;
1573 	}
1574 
1575 	if (watcher->flags & WATCHER_CLOSING) {
1576 		return 0;
1577 	}
1578 
1579 	if (OSAddAtomic(1, &watcher->num_readers) != 0) {
1580 		// don't allow multiple threads to read from the fd at the same time
1581 		OSAddAtomic(-1, &watcher->num_readers);
1582 		return EAGAIN;
1583 	}
1584 
1585 restart_watch:
1586 	if (watcher->rd == watcher->wr) {
1587 		if (watcher->flags & WATCHER_CLOSING) {
1588 			OSAddAtomic(-1, &watcher->num_readers);
1589 			return 0;
1590 		}
1591 		OSAddAtomic(1, &watcher->blockers);
1592 
1593 		// there's nothing to do, go to sleep
1594 		error = tsleep((caddr_t)watcher, PUSER | PCATCH, "fsevents_empty", 0);
1595 
1596 		OSAddAtomic(-1, &watcher->blockers);
1597 
1598 		if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
1599 			OSAddAtomic(-1, &watcher->num_readers);
1600 			return error;
1601 		}
1602 	}
1603 
1604 	// if we dropped events, return that as an event first
1605 	if (watcher->flags & WATCHER_DROPPED_EVENTS) {
1606 		int32_t val = FSE_EVENTS_DROPPED;
1607 
1608 		error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1609 		if (error == 0) {
1610 			val = 0; // a fake pid
1611 			error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1612 
1613 			tmp16 = FSE_ARG_DONE; // makes it a consistent msg
1614 			error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
1615 
1616 			last_full_event_resid = uio_resid(uio);
1617 		}
1618 
1619 		if (error) {
1620 			OSAddAtomic(-1, &watcher->num_readers);
1621 			return error;
1622 		}
1623 
1624 		watcher->flags &= ~WATCHER_DROPPED_EVENTS;
1625 	}
1626 
1627 	skipped = 0;
1628 
1629 	lck_rw_lock_shared(&event_handling_lock);
1630 	while (uio_resid(uio) > 0 && watcher->rd != watcher->wr) {
1631 		if (watcher->flags & WATCHER_CLOSING) {
1632 			break;
1633 		}
1634 
1635 		//
1636 		// check if the event is something of interest to us
1637 		// (since it may have been recycled/reused and changed
1638 		// its type or which device it is for)
1639 		//
1640 		kfse = watcher->event_queue[watcher->rd];
1641 		if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
1642 			break;
1643 		}
1644 
1645 		if (watcher->event_list[kfse->type] == FSE_REPORT) {
1646 			if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) &&
1647 			    kfse->type != FSE_DOCID_CREATED &&
1648 			    kfse->type != FSE_DOCID_CHANGED &&
1649 			    is_ignored_directory(kfse->str)) {
1650 				// If this is not an Apple System Service, skip specified directories
1651 				// radar://12034844
1652 				error = 0;
1653 				skipped = 1;
1654 			} else {
1655 				skipped = 0;
1656 				if (last_event_ptr == kfse) {
1657 					last_event_ptr = NULL;
1658 					last_event_type = -1;
1659 					last_coalesced_time = 0;
1660 				}
1661 				error = copy_out_kfse(watcher, kfse, uio);
1662 				if (error != 0) {
1663 					// if an event won't fit or encountered an error while
1664 					// we were copying it out, then backup to the last full
1665 					// event and just bail out.  if the error was ENOENT
1666 					// then we can continue regular processing, otherwise
1667 					// we should unlock things and return.
1668 					uio_setresid(uio, last_full_event_resid);
1669 					if (error != ENOENT) {
1670 						lck_rw_unlock_shared(&event_handling_lock);
1671 						error = 0;
1672 						goto get_out;
1673 					}
1674 				}
1675 
1676 				last_full_event_resid = uio_resid(uio);
1677 			}
1678 		}
1679 
1680 		watcher->event_queue[watcher->rd] = NULL;
1681 		watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1682 		OSSynchronizeIO();
1683 		release_event_ref(kfse);
1684 	}
1685 	lck_rw_unlock_shared(&event_handling_lock);
1686 
1687 	if (skipped && error == 0) {
1688 		goto restart_watch;
1689 	}
1690 
1691 get_out:
1692 	OSAddAtomic(-1, &watcher->num_readers);
1693 
1694 	return error;
1695 }
1696 
1697 
1698 //
1699 // Shoo watchers away from a volume that's about to be unmounted
1700 // (so that it can be cleanly unmounted).
1701 //
1702 void
fsevent_unmount(__unused struct mount * mp,__unused vfs_context_t ctx)1703 fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
1704 {
1705 #if !defined(XNU_TARGET_OS_OSX)
1706 	dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
1707 	int error, waitcount = 0;
1708 	struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
1709 
1710 	// wait for any other pending unmounts to complete
1711 	lock_watch_table();
1712 	while (fsevent_unmount_dev != 0) {
1713 		error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
1714 		if (error == EWOULDBLOCK) {
1715 			error = 0;
1716 		}
1717 		if (!error && (++waitcount >= 10)) {
1718 			error = EWOULDBLOCK;
1719 			printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
1720 		}
1721 		if (error) {
1722 			// there's a problem, bail out
1723 			unlock_watch_table();
1724 			return;
1725 		}
1726 	}
1727 	if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
1728 		// nobody watching for unmount pending events
1729 		unlock_watch_table();
1730 		return;
1731 	}
1732 	// this is now the current unmount pending
1733 	fsevent_unmount_dev = dev;
1734 	fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
1735 	unlock_watch_table();
1736 
1737 	// send an event to notify the watcher they need to get off the mount
1738 	error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
1739 
1740 	// wait for acknowledgment(s) (give up if it takes too long)
1741 	lock_watch_table();
1742 	waitcount = 0;
1743 	while (fsevent_unmount_dev == dev) {
1744 		error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
1745 		if (error == EWOULDBLOCK) {
1746 			error = 0;
1747 		}
1748 		if (!error && (++waitcount >= 10)) {
1749 			error = EWOULDBLOCK;
1750 			printf("unmount pending ack timeout for dev %d\n", dev);
1751 		}
1752 		if (error) {
1753 			// there's a problem, bail out
1754 			if (fsevent_unmount_dev == dev) {
1755 				fsevent_unmount_dev = 0;
1756 				fsevent_unmount_ack_count = 0;
1757 			}
1758 			wakeup((caddr_t)&fsevent_unmount_dev);
1759 			break;
1760 		}
1761 	}
1762 	unlock_watch_table();
1763 #endif /* ! XNU_TARGET_OS_OSX */
1764 }
1765 
1766 
1767 //
1768 // /dev/fsevents device code
1769 //
1770 static int fsevents_installed = 0;
1771 
1772 typedef struct fsevent_handle {
1773 	UInt32            flags;
1774 	SInt32            active;
1775 	fs_event_watcher *watcher;
1776 	struct klist      knotes;
1777 	struct selinfo    si;
1778 } fsevent_handle;
1779 
1780 #define FSEH_CLOSING   0x0001
1781 
1782 static int
fseventsf_read(struct fileproc * fp,struct uio * uio,__unused int flags,__unused vfs_context_t ctx)1783 fseventsf_read(struct fileproc *fp, struct uio *uio,
1784     __unused int flags, __unused vfs_context_t ctx)
1785 {
1786 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
1787 	int error;
1788 
1789 	error = fmod_watch(fseh->watcher, uio);
1790 
1791 	return error;
1792 }
1793 
1794 
1795 #pragma pack(push, 4)
1796 typedef struct fsevent_dev_filter_args32 {
1797 	uint32_t            num_devices;
1798 	user32_addr_t       devices;
1799 } fsevent_dev_filter_args32;
1800 typedef struct fsevent_dev_filter_args64 {
1801 	uint32_t            num_devices;
1802 	user64_addr_t       devices;
1803 } fsevent_dev_filter_args64;
1804 #pragma pack(pop)
1805 
1806 #define FSEVENTS_DEVICE_FILTER_32       _IOW('s', 100, fsevent_dev_filter_args32)
1807 #define FSEVENTS_DEVICE_FILTER_64       _IOW('s', 100, fsevent_dev_filter_args64)
1808 
1809 static int
fseventsf_ioctl(struct fileproc * fp,u_long cmd,caddr_t data,vfs_context_t ctx)1810 fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
1811 {
1812 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
1813 	int ret = 0;
1814 	fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
1815 
1816 	OSAddAtomic(1, &fseh->active);
1817 	if (fseh->flags & FSEH_CLOSING) {
1818 		OSAddAtomic(-1, &fseh->active);
1819 		return 0;
1820 	}
1821 
1822 	switch (cmd) {
1823 	case FIONBIO:
1824 	case FIOASYNC:
1825 		break;
1826 
1827 	case FSEVENTS_WANT_COMPACT_EVENTS: {
1828 		fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
1829 		break;
1830 	}
1831 
1832 	case FSEVENTS_WANT_EXTENDED_INFO: {
1833 		fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
1834 		break;
1835 	}
1836 
1837 	case FSEVENTS_GET_CURRENT_ID: {
1838 		*(uint64_t *)data = fseh->watcher->max_event_id;
1839 		ret = 0;
1840 		break;
1841 	}
1842 
1843 	case FSEVENTS_DEVICE_FILTER_32: {
1844 		if (proc_is64bit(vfs_context_proc(ctx))) {
1845 			ret = EINVAL;
1846 			break;
1847 		}
1848 		fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
1849 
1850 		devfilt_args = &_devfilt_args;
1851 		memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
1852 		devfilt_args->num_devices = devfilt_args32->num_devices;
1853 		devfilt_args->devices     = CAST_USER_ADDR_T(devfilt_args32->devices);
1854 		goto handle_dev_filter;
1855 	}
1856 
1857 	case FSEVENTS_DEVICE_FILTER_64:
1858 		if (!proc_is64bit(vfs_context_proc(ctx))) {
1859 			ret = EINVAL;
1860 			break;
1861 		}
1862 		devfilt_args = (fsevent_dev_filter_args64 *)data;
1863 
1864 handle_dev_filter:
1865 		{
1866 			int new_num_devices, old_num_devices = 0;
1867 			dev_t *devices_not_to_watch, *tmp = NULL;
1868 
1869 			if (devfilt_args->num_devices > 256) {
1870 				ret = EINVAL;
1871 				break;
1872 			}
1873 
1874 			new_num_devices = devfilt_args->num_devices;
1875 			if (new_num_devices == 0) {
1876 				lock_watch_table();
1877 
1878 				tmp = fseh->watcher->devices_not_to_watch;
1879 				fseh->watcher->devices_not_to_watch = NULL;
1880 				old_num_devices = fseh->watcher->num_devices;
1881 				fseh->watcher->num_devices = new_num_devices;
1882 
1883 				unlock_watch_table();
1884 				kfree_data(tmp, old_num_devices * sizeof(dev_t));
1885 				break;
1886 			}
1887 
1888 			devices_not_to_watch = kalloc_data(new_num_devices * sizeof(dev_t), Z_WAITOK);
1889 			if (devices_not_to_watch == NULL) {
1890 				ret = ENOMEM;
1891 				break;
1892 			}
1893 
1894 			ret = copyin((user_addr_t)devfilt_args->devices,
1895 			    (void *)devices_not_to_watch,
1896 			    new_num_devices * sizeof(dev_t));
1897 			if (ret) {
1898 				kfree_data(devices_not_to_watch, new_num_devices * sizeof(dev_t));
1899 				break;
1900 			}
1901 
1902 			lock_watch_table();
1903 			old_num_devices = fseh->watcher->num_devices;
1904 			fseh->watcher->num_devices = new_num_devices;
1905 			tmp = fseh->watcher->devices_not_to_watch;
1906 			fseh->watcher->devices_not_to_watch = devices_not_to_watch;
1907 			unlock_watch_table();
1908 
1909 			kfree_data(tmp, old_num_devices * sizeof(dev_t));
1910 
1911 			break;
1912 		}
1913 
1914 	case FSEVENTS_UNMOUNT_PENDING_ACK: {
1915 		lock_watch_table();
1916 		dev_t dev = *(dev_t *)data;
1917 		if (fsevent_unmount_dev == dev) {
1918 			if (--fsevent_unmount_ack_count <= 0) {
1919 				fsevent_unmount_dev = 0;
1920 				wakeup((caddr_t)&fsevent_unmount_dev);
1921 			}
1922 		} else {
1923 			printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
1924 			ret = EINVAL;
1925 		}
1926 		unlock_watch_table();
1927 		break;
1928 	}
1929 
1930 	default:
1931 		ret = EINVAL;
1932 		break;
1933 	}
1934 
1935 	OSAddAtomic(-1, &fseh->active);
1936 	return ret;
1937 }
1938 
1939 
1940 static int
fseventsf_select(struct fileproc * fp,int which,__unused void * wql,vfs_context_t ctx)1941 fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
1942 {
1943 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
1944 	int ready = 0;
1945 
1946 	if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
1947 		return 0;
1948 	}
1949 
1950 
1951 	// if there's nothing in the queue, we're not ready
1952 	if (fseh->watcher->rd != fseh->watcher->wr) {
1953 		ready = 1;
1954 	}
1955 
1956 	if (!ready) {
1957 		selrecord(vfs_context_proc(ctx), &fseh->si, wql);
1958 	}
1959 
1960 	return ready;
1961 }
1962 
1963 
1964 #if NOTUSED
1965 static int
fseventsf_stat(__unused struct fileproc * fp,__unused struct stat * sb,__unused vfs_context_t ctx)1966 fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
1967 {
1968 	return ENOTSUP;
1969 }
1970 #endif
1971 
1972 static int
fseventsf_close(struct fileglob * fg,__unused vfs_context_t ctx)1973 fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
1974 {
1975 	fsevent_handle *fseh = (struct fsevent_handle *)fg_get_data(fg);
1976 	fs_event_watcher *watcher;
1977 
1978 	OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
1979 	while (OSAddAtomic(0, &fseh->active) > 0) {
1980 		tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", 1);
1981 	}
1982 
1983 	watcher = fseh->watcher;
1984 	fg_set_data(fg, NULL);
1985 	fseh->watcher = NULL;
1986 
1987 	remove_watcher(watcher);
1988 	kfree_type(fsevent_handle, fseh);
1989 
1990 	return 0;
1991 }
1992 
1993 static void
filt_fsevent_detach(struct knote * kn)1994 filt_fsevent_detach(struct knote *kn)
1995 {
1996 	fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
1997 
1998 	lock_watch_table();
1999 
2000 	KNOTE_DETACH(&fseh->knotes, kn);
2001 
2002 	unlock_watch_table();
2003 }
2004 
2005 /*
2006  * Determine whether this knote should be active
2007  *
2008  * This is kind of subtle.
2009  *      --First, notice if the vnode has been revoked: in so, override hint
2010  *      --EVFILT_READ knotes are checked no matter what the hint is
2011  *      --Other knotes activate based on hint.
2012  *      --If hint is revoke, set special flags and activate
2013  */
2014 static int
filt_fsevent_common(struct knote * kn,struct kevent_qos_s * kev,long hint)2015 filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
2016 {
2017 	fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
2018 	int activate = 0;
2019 	int32_t rd, wr, amt;
2020 	int64_t data = 0;
2021 
2022 	if (NOTE_REVOKE == hint) {
2023 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2024 		activate = 1;
2025 	}
2026 
2027 	rd = fseh->watcher->rd;
2028 	wr = fseh->watcher->wr;
2029 	if (rd <= wr) {
2030 		amt = wr - rd;
2031 	} else {
2032 		amt = fseh->watcher->eventq_size - (rd - wr);
2033 	}
2034 
2035 	switch (kn->kn_filter) {
2036 	case EVFILT_READ:
2037 		data = amt;
2038 		activate = (data != 0);
2039 		break;
2040 	case EVFILT_VNODE:
2041 		/* Check events this note matches against the hint */
2042 		if (kn->kn_sfflags & hint) {
2043 			kn->kn_fflags |= hint;         /* Set which event occurred */
2044 		}
2045 		if (kn->kn_fflags != 0) {
2046 			activate = 1;
2047 		}
2048 		break;
2049 	default:
2050 		// nothing to do...
2051 		break;
2052 	}
2053 
2054 	if (activate && kev) {
2055 		knote_fill_kevent(kn, kev, data);
2056 	}
2057 	return activate;
2058 }
2059 
2060 static int
filt_fsevent(struct knote * kn,long hint)2061 filt_fsevent(struct knote *kn, long hint)
2062 {
2063 	return filt_fsevent_common(kn, NULL, hint);
2064 }
2065 
2066 static int
filt_fsevent_touch(struct knote * kn,struct kevent_qos_s * kev)2067 filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
2068 {
2069 	int res;
2070 
2071 	lock_watch_table();
2072 
2073 	/* accept new fflags/data as saved */
2074 	kn->kn_sfflags = kev->fflags;
2075 	kn->kn_sdata = kev->data;
2076 
2077 	/* restrict the current results to the (smaller?) set of new interest */
2078 	/*
2079 	 * For compatibility with previous implementations, we leave kn_fflags
2080 	 * as they were before.
2081 	 */
2082 	//kn->kn_fflags &= kev->fflags;
2083 
2084 	/* determine if the filter is now fired */
2085 	res = filt_fsevent_common(kn, NULL, 0);
2086 
2087 	unlock_watch_table();
2088 
2089 	return res;
2090 }
2091 
2092 static int
filt_fsevent_process(struct knote * kn,struct kevent_qos_s * kev)2093 filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
2094 {
2095 	int res;
2096 
2097 	lock_watch_table();
2098 
2099 	res = filt_fsevent_common(kn, kev, 0);
2100 
2101 	unlock_watch_table();
2102 
2103 	return res;
2104 }
2105 
2106 SECURITY_READ_ONLY_EARLY(struct  filterops) fsevent_filtops = {
2107 	.f_isfd = 1,
2108 	.f_attach = NULL,
2109 	.f_detach = filt_fsevent_detach,
2110 	.f_event = filt_fsevent,
2111 	.f_touch = filt_fsevent_touch,
2112 	.f_process = filt_fsevent_process,
2113 };
2114 
2115 static int
fseventsf_kqfilter(struct fileproc * fp,struct knote * kn,__unused struct kevent_qos_s * kev)2116 fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
2117     __unused struct kevent_qos_s *kev)
2118 {
2119 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2120 	int res;
2121 
2122 	kn->kn_hook = (void*)fseh;
2123 	kn->kn_filtid = EVFILTID_FSEVENT;
2124 
2125 	lock_watch_table();
2126 
2127 	KNOTE_ATTACH(&fseh->knotes, kn);
2128 
2129 	/* check to see if it is fired already */
2130 	res = filt_fsevent_common(kn, NULL, 0);
2131 
2132 	unlock_watch_table();
2133 
2134 	return res;
2135 }
2136 
2137 
2138 static int
fseventsf_drain(struct fileproc * fp,__unused vfs_context_t ctx)2139 fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2140 {
2141 	int counter = 0;
2142 	fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2143 
2144 	// if there are people still waiting, sleep for 10ms to
2145 	// let them clean up and get out of there.  however we
2146 	// also don't want to get stuck forever so if they don't
2147 	// exit after 5 seconds we're tearing things down anyway.
2148 	while (fseh->watcher->blockers && counter++ < 500) {
2149 		// issue wakeup in case anyone is blocked waiting for an event
2150 		// do this each time we wakeup in case the blocker missed
2151 		// the wakeup due to the unprotected test of WATCHER_CLOSING
2152 		// and decision to tsleep in fmod_watch... this bit of
2153 		// latency is a decent tradeoff against not having to
2154 		// take and drop a lock in fmod_watch
2155 		lock_watch_table();
2156 		fsevents_wakeup(fseh->watcher);
2157 		unlock_watch_table();
2158 
2159 		tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1);
2160 	}
2161 
2162 	return 0;
2163 }
2164 
2165 
2166 static int
fseventsopen(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2167 fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2168 {
2169 	if (!kauth_cred_issuser(kauth_cred_get())) {
2170 		return EPERM;
2171 	}
2172 
2173 	return 0;
2174 }
2175 
2176 static int
fseventsclose(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2177 fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2178 {
2179 	return 0;
2180 }
2181 
2182 static int
fseventsread(__unused dev_t dev,__unused struct uio * uio,__unused int ioflag)2183 fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
2184 {
2185 	return EIO;
2186 }
2187 
2188 
2189 static int
parse_buffer_and_add_events(const char * buffer,size_t bufsize,vfs_context_t ctx,size_t * remainder)2190 parse_buffer_and_add_events(const char *buffer, size_t bufsize, vfs_context_t ctx, size_t *remainder)
2191 {
2192 	const fse_info *finfo, *dest_finfo;
2193 	const char *path, *ptr, *dest_path, *event_start = buffer;
2194 	size_t path_len, dest_path_len;
2195 	int type, err = 0;
2196 
2197 
2198 	ptr = buffer;
2199 	while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
2200 		type = *(const int *)ptr;
2201 		if (type < 0 || type >= FSE_MAX_EVENTS) {
2202 			err = EINVAL;
2203 			break;
2204 		}
2205 
2206 		ptr += sizeof(int);
2207 
2208 		finfo = (const fse_info *)ptr;
2209 		ptr += sizeof(fse_info);
2210 
2211 		path = ptr;
2212 		while (ptr < buffer + bufsize && *ptr != '\0') {
2213 			ptr++;
2214 		}
2215 
2216 		if (ptr >= buffer + bufsize) {
2217 			break;
2218 		}
2219 
2220 		ptr++; // advance over the trailing '\0'
2221 
2222 		path_len = ptr - path;
2223 
2224 		if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2225 			event_start = ptr; // record where the next event starts
2226 
2227 			err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2228 			if (err) {
2229 				break;
2230 			}
2231 			continue;
2232 		}
2233 
2234 		//
2235 		// if we're here we have to slurp up the destination finfo
2236 		// and path so that we can pass them to the add_fsevent()
2237 		// call.  basically it's a copy of the above code.
2238 		//
2239 		dest_finfo = (const fse_info *)ptr;
2240 		ptr += sizeof(fse_info);
2241 
2242 		dest_path = ptr;
2243 		while (ptr < buffer + bufsize && *ptr != '\0') {
2244 			ptr++;
2245 		}
2246 
2247 		if (ptr >= buffer + bufsize) {
2248 			break;
2249 		}
2250 
2251 		ptr++;       // advance over the trailing '\0'
2252 		event_start = ptr; // record where the next event starts
2253 
2254 		dest_path_len = ptr - dest_path;
2255 		//
2256 		// If the destination inode number is non-zero, generate a rename
2257 		// with both source and destination FSE_ARG_FINFO. Otherwise generate
2258 		// a rename with only one FSE_ARG_FINFO. If you need to inject an
2259 		// exchange with an inode of zero, just make that inode (and its path)
2260 		// come in as the first one, not the second.
2261 		//
2262 		if (dest_finfo->ino) {
2263 			err = add_fsevent(type, ctx,
2264 			    FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2265 			    FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2266 			    FSE_ARG_DONE);
2267 		} else {
2268 			err = add_fsevent(type, ctx,
2269 			    FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2270 			    FSE_ARG_STRING, dest_path_len, dest_path,
2271 			    FSE_ARG_DONE);
2272 		}
2273 
2274 		if (err) {
2275 			break;
2276 		}
2277 	}
2278 
2279 	// if the last event wasn't complete, set the remainder
2280 	// to be the last event start boundary.
2281 	//
2282 	*remainder = (long)((buffer + bufsize) - event_start);
2283 
2284 	return err;
2285 }
2286 
2287 
2288 //
2289 // Note: this buffer size can not ever be less than
2290 //       2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
2291 //       because that is the max size for a single event.
2292 //       I made it 4k to be a "nice" size.  making it
2293 //       smaller is not a good idea.
2294 //
2295 #define WRITE_BUFFER_SIZE  4096
2296 char *write_buffer = NULL;
2297 
2298 static int
fseventswrite(__unused dev_t dev,struct uio * uio,__unused int ioflag)2299 fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
2300 {
2301 	int error = 0;
2302 	size_t count, offset = 0, remainder = 0;
2303 	vfs_context_t ctx = vfs_context_current();
2304 
2305 	lck_mtx_lock(&event_writer_lock);
2306 
2307 	if (write_buffer == NULL) {
2308 		if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) {
2309 			lck_mtx_unlock(&event_writer_lock);
2310 			return ENOMEM;
2311 		}
2312 	}
2313 
2314 	//
2315 	// this loop copies in and processes the events written.
2316 	// it takes care to copy in reasonable size chunks and
2317 	// process them.  if there is an event that spans a chunk
2318 	// boundary we're careful to copy those bytes down to the
2319 	// beginning of the buffer and read the next chunk in just
2320 	// after it.
2321 	//
2322 	while (uio_resid(uio)) {
2323 		count = MIN(WRITE_BUFFER_SIZE - offset, (size_t)uio_resid(uio));
2324 
2325 		error = uiomove(write_buffer + offset, (int)count, uio);
2326 		if (error) {
2327 			break;
2328 		}
2329 
2330 		error = parse_buffer_and_add_events(write_buffer, offset + count, ctx, &remainder);
2331 		if (error) {
2332 			break;
2333 		}
2334 
2335 		//
2336 		// if there's any remainder, copy it down to the beginning
2337 		// of the buffer so that it will get processed the next time
2338 		// through the loop.  note that the remainder always starts
2339 		// at an event boundary.
2340 		//
2341 		memmove(write_buffer, (write_buffer + count + offset) - remainder, remainder);
2342 		offset = remainder;
2343 	}
2344 
2345 	lck_mtx_unlock(&event_writer_lock);
2346 
2347 	return error;
2348 }
2349 
2350 
2351 static const struct fileops fsevents_fops = {
2352 	.fo_type     = DTYPE_FSEVENTS,
2353 	.fo_read     = fseventsf_read,
2354 	.fo_write    = fo_no_write,
2355 	.fo_ioctl    = fseventsf_ioctl,
2356 	.fo_select   = fseventsf_select,
2357 	.fo_close    = fseventsf_close,
2358 	.fo_kqfilter = fseventsf_kqfilter,
2359 	.fo_drain    = fseventsf_drain,
2360 };
2361 
2362 typedef struct fsevent_clone_args32 {
2363 	user32_addr_t       event_list;
2364 	int32_t             num_events;
2365 	int32_t             event_queue_depth;
2366 	user32_addr_t       fd;
2367 } fsevent_clone_args32;
2368 
2369 typedef struct fsevent_clone_args64 {
2370 	user64_addr_t       event_list;
2371 	int32_t             num_events;
2372 	int32_t             event_queue_depth;
2373 	user64_addr_t       fd;
2374 } fsevent_clone_args64;
2375 
2376 #define FSEVENTS_CLONE_32       _IOW('s', 1, fsevent_clone_args32)
2377 #define FSEVENTS_CLONE_64       _IOW('s', 1, fsevent_clone_args64)
2378 
2379 static int
fseventsioctl(__unused dev_t dev,u_long cmd,caddr_t data,__unused int flag,struct proc * p)2380 fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2381 {
2382 	struct fileproc *f;
2383 	int fd, error;
2384 	fsevent_handle *fseh = NULL;
2385 	fsevent_clone_args64 *fse_clone_args, _fse_clone;
2386 	int8_t *event_list;
2387 	int is64bit = proc_is64bit(p);
2388 
2389 	switch (cmd) {
2390 	case FSEVENTS_CLONE_32: {
2391 		if (is64bit) {
2392 			return EINVAL;
2393 		}
2394 		fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
2395 
2396 		fse_clone_args = &_fse_clone;
2397 		memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
2398 
2399 		fse_clone_args->event_list        = CAST_USER_ADDR_T(args32->event_list);
2400 		fse_clone_args->num_events        = args32->num_events;
2401 		fse_clone_args->event_queue_depth = args32->event_queue_depth;
2402 		fse_clone_args->fd                = CAST_USER_ADDR_T(args32->fd);
2403 		goto handle_clone;
2404 	}
2405 
2406 	case FSEVENTS_CLONE_64:
2407 		if (!is64bit) {
2408 			return EINVAL;
2409 		}
2410 		fse_clone_args = (fsevent_clone_args64 *)data;
2411 
2412 handle_clone:
2413 		if (fse_clone_args->num_events <= 0 || fse_clone_args->num_events > 4096) {
2414 			return EINVAL;
2415 		}
2416 
2417 		fseh = kalloc_type(fsevent_handle, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2418 
2419 		klist_init(&fseh->knotes);
2420 
2421 		event_list = kalloc_data(fse_clone_args->num_events * sizeof(int8_t), Z_WAITOK);
2422 		if (event_list == NULL) {
2423 			kfree_type(fsevent_handle, fseh);
2424 			return ENOMEM;
2425 		}
2426 
2427 		error = copyin((user_addr_t)fse_clone_args->event_list,
2428 		    (void *)event_list,
2429 		    fse_clone_args->num_events * sizeof(int8_t));
2430 		if (error) {
2431 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2432 			kfree_type(fsevent_handle, fseh);
2433 			return error;
2434 		}
2435 
2436 		/*
2437 		 * Lock down the user's "fd" result buffer so it's safe
2438 		 * to hold locks while we copy it out.
2439 		 */
2440 		error = vslock((user_addr_t)fse_clone_args->fd,
2441 		    sizeof(int32_t));
2442 		if (error) {
2443 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2444 			kfree_type(fsevent_handle, fseh);
2445 			return error;
2446 		}
2447 
2448 		error = add_watcher(event_list,
2449 		    fse_clone_args->num_events,
2450 		    fse_clone_args->event_queue_depth,
2451 		    &fseh->watcher,
2452 		    fseh);
2453 		if (error) {
2454 			vsunlock((user_addr_t)fse_clone_args->fd,
2455 			    sizeof(int32_t), 0);
2456 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2457 			kfree_type(fsevent_handle, fseh);
2458 			return error;
2459 		}
2460 
2461 		fseh->watcher->fseh = fseh;
2462 
2463 		error = falloc(p, &f, &fd, vfs_context_current());
2464 		if (error) {
2465 			remove_watcher(fseh->watcher);
2466 			vsunlock((user_addr_t)fse_clone_args->fd,
2467 			    sizeof(int32_t), 0);
2468 			kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2469 			kfree_type(fsevent_handle, fseh);
2470 			return error;
2471 		}
2472 		proc_fdlock(p);
2473 		f->fp_glob->fg_flag = FREAD | FWRITE;
2474 		f->fp_glob->fg_ops = &fsevents_fops;
2475 		fp_set_data(f, fseh);
2476 
2477 		/*
2478 		 * We can safely hold the proc_fdlock across this copyout()
2479 		 * because of the vslock() call above.  The vslock() call
2480 		 * also ensures that we will never get an error, so assert
2481 		 * this.
2482 		 */
2483 		error = copyout((void *)&fd, (user_addr_t)fse_clone_args->fd, sizeof(int32_t));
2484 		assert(error == 0);
2485 
2486 		procfdtbl_releasefd(p, fd, NULL);
2487 		fp_drop(p, fd, f, 1);
2488 		proc_fdunlock(p);
2489 
2490 		vsunlock((user_addr_t)fse_clone_args->fd,
2491 		    sizeof(int32_t), 1);
2492 		break;
2493 
2494 	default:
2495 		error = EINVAL;
2496 		break;
2497 	}
2498 
2499 	return error;
2500 }
2501 
2502 static void
fsevents_wakeup(fs_event_watcher * watcher)2503 fsevents_wakeup(fs_event_watcher *watcher)
2504 {
2505 	selwakeup(&watcher->fseh->si);
2506 	KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
2507 	wakeup((caddr_t)watcher);
2508 }
2509 
2510 
2511 /*
2512  * A struct describing which functions will get invoked for certain
2513  * actions.
2514  */
2515 static const struct cdevsw fsevents_cdevsw =
2516 {
2517 	.d_open = fseventsopen,
2518 	.d_close = fseventsclose,
2519 	.d_read = fseventsread,
2520 	.d_write = fseventswrite,
2521 	.d_ioctl = fseventsioctl,
2522 	.d_stop = (stop_fcn_t *)&nulldev,
2523 	.d_reset = (reset_fcn_t *)&nulldev,
2524 	.d_select = eno_select,
2525 	.d_mmap = eno_mmap,
2526 	.d_strategy = eno_strat,
2527 	.d_reserved_1 = eno_getc,
2528 	.d_reserved_2 = eno_putc,
2529 };
2530 
2531 
2532 /*
2533  * Called to initialize our device,
2534  * and to register ourselves with devfs
2535  */
2536 
2537 void
fsevents_init(void)2538 fsevents_init(void)
2539 {
2540 	int ret;
2541 
2542 	if (fsevents_installed) {
2543 		return;
2544 	}
2545 
2546 	fsevents_installed = 1;
2547 
2548 	ret = cdevsw_add(-1, &fsevents_cdevsw);
2549 	if (ret < 0) {
2550 		fsevents_installed = 0;
2551 		return;
2552 	}
2553 
2554 	devfs_make_node(makedev(ret, 0), DEVFS_CHAR,
2555 	    UID_ROOT, GID_WHEEL, 0644, "fsevents", 0);
2556 
2557 	fsevents_internal_init();
2558 }
2559 
2560 
2561 char *
get_pathbuff(void)2562 get_pathbuff(void)
2563 {
2564 	return zalloc(ZV_NAMEI);
2565 }
2566 
2567 void
release_pathbuff(char * path)2568 release_pathbuff(char *path)
2569 {
2570 	if (path == NULL) {
2571 		return;
2572 	}
2573 	zfree(ZV_NAMEI, path);
2574 }
2575 
2576 int
get_fse_info(struct vnode * vp,fse_info * fse,__unused vfs_context_t ctx)2577 get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
2578 {
2579 	struct vnode_attr va;
2580 
2581 	VATTR_INIT(&va);
2582 	VATTR_WANTED(&va, va_fsid);
2583 	va.va_vaflags |= VA_REALFSID;
2584 	VATTR_WANTED(&va, va_fileid);
2585 	VATTR_WANTED(&va, va_mode);
2586 	VATTR_WANTED(&va, va_uid);
2587 	VATTR_WANTED(&va, va_gid);
2588 	if (vp->v_flag & VISHARDLINK) {
2589 		if (vp->v_type == VDIR) {
2590 			VATTR_WANTED(&va, va_dirlinkcount);
2591 		} else {
2592 			VATTR_WANTED(&va, va_nlink);
2593 		}
2594 	}
2595 
2596 	if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
2597 		memset(fse, 0, sizeof(fse_info));
2598 		return -1;
2599 	}
2600 
2601 	return vnode_get_fse_info_from_vap(vp, fse, &va);
2602 }
2603 
2604 int
vnode_get_fse_info_from_vap(vnode_t vp,fse_info * fse,struct vnode_attr * vap)2605 vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap)
2606 {
2607 	fse->ino  = (ino64_t)vap->va_fileid;
2608 	fse->dev  = (dev_t)vap->va_fsid;
2609 	fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
2610 	fse->uid  = (uid_t)vap->va_uid;
2611 	fse->gid  = (gid_t)vap->va_gid;
2612 	if (vp->v_flag & VISHARDLINK) {
2613 		fse->mode |= FSE_MODE_HLINK;
2614 		if (vp->v_type == VDIR) {
2615 			fse->nlink = (uint64_t)vap->va_dirlinkcount;
2616 		} else {
2617 			fse->nlink = (uint64_t)vap->va_nlink;
2618 		}
2619 	}
2620 
2621 	return 0;
2622 }
2623 
2624 void
create_fsevent_from_kevent(vnode_t vp,uint32_t kevents,struct vnode_attr * vap)2625 create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
2626 {
2627 	int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
2628 	char pathbuf[MAXPATHLEN];
2629 	fse_info fse;
2630 
2631 
2632 	if (kevents & VNODE_EVENT_DELETE) {
2633 		fsevent_type = FSE_DELETE;
2634 	} else if (kevents & (VNODE_EVENT_EXTEND | VNODE_EVENT_WRITE)) {
2635 		fsevent_type = FSE_CONTENT_MODIFIED;
2636 	} else if (kevents & VNODE_EVENT_LINK) {
2637 		fsevent_type = FSE_CREATE_FILE;
2638 	} else if (kevents & VNODE_EVENT_RENAME) {
2639 		fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
2640 	} else if (kevents & (VNODE_EVENT_FILE_CREATED | VNODE_EVENT_FILE_REMOVED | VNODE_EVENT_DIR_CREATED | VNODE_EVENT_DIR_REMOVED)) {
2641 		fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
2642 	} else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
2643 		fsevent_type = FSE_STAT_CHANGED;
2644 	}
2645 
2646 	// printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
2647 
2648 	fse.dev = vap->va_fsid;
2649 	fse.ino = vap->va_fileid;
2650 	fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode;
2651 	if (vp->v_flag & VISHARDLINK) {
2652 		fse.mode |= FSE_MODE_HLINK;
2653 		if (vp->v_type == VDIR) {
2654 			fse.nlink = vap->va_dirlinkcount;
2655 		} else {
2656 			fse.nlink = vap->va_nlink;
2657 		}
2658 	}
2659 
2660 	if (vp->v_type == VDIR) {
2661 		fse.mode |= FSE_REMOTE_DIR_EVENT;
2662 	}
2663 
2664 
2665 	fse.uid = vap->va_uid;
2666 	fse.gid = vap->va_gid;
2667 
2668 	len = sizeof(pathbuf);
2669 	if (vn_getpath_no_firmlink(vp, pathbuf, &len) == 0) {
2670 		add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
2671 	}
2672 	return;
2673 }
2674 
2675 #else /* CONFIG_FSE */
2676 
2677 #include <sys/fsevents.h>
2678 
2679 /*
2680  * The get_pathbuff and release_pathbuff routines are used in places not
2681  * related to fsevents, and it's a handy abstraction, so define trivial
2682  * versions that don't cache a pool of buffers.  This way, we don't have
2683  * to conditionalize the callers, and they still get the advantage of the
2684  * pool of buffers if CONFIG_FSE is turned on.
2685  */
2686 char *
get_pathbuff(void)2687 get_pathbuff(void)
2688 {
2689 	return zalloc(ZV_NAMEI);
2690 }
2691 
2692 void
release_pathbuff(char * path)2693 release_pathbuff(char *path)
2694 {
2695 	zfree(ZV_NAMEI, path);
2696 }
2697 
2698 int
add_fsevent(__unused int type,__unused vfs_context_t ctx,...)2699 add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
2700 {
2701 	return 0;
2702 }
2703 
2704 int
need_fsevent(__unused int type,__unused vnode_t vp)2705 need_fsevent(__unused int type, __unused vnode_t vp)
2706 {
2707 	return 0;
2708 }
2709 
2710 #endif /* CONFIG_FSE */
2711