1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <stdarg.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/event.h> // for kqueue related stuff
32 #include <sys/fsevents.h>
33
34 #if CONFIG_FSE
35 #include <sys/namei.h>
36 #include <sys/filedesc.h>
37 #include <sys/kernel.h>
38 #include <sys/file_internal.h>
39 #include <sys/stat.h>
40 #include <sys/vnode_internal.h>
41 #include <sys/mount_internal.h>
42 #include <sys/proc_internal.h>
43 #include <sys/kauth.h>
44 #include <sys/uio.h>
45 #include <kern/kalloc.h>
46 #include <sys/dirent.h>
47 #include <sys/attr.h>
48 #include <sys/sysctl.h>
49 #include <sys/ubc.h>
50 #include <machine/cons.h>
51 #include <miscfs/specfs/specdev.h>
52 #include <miscfs/devfs/devfs.h>
53 #include <sys/filio.h>
54 #include <kern/locks.h>
55 #include <libkern/OSAtomic.h>
56 #include <kern/zalloc.h>
57 #include <mach/mach_time.h>
58 #include <kern/thread_call.h>
59 #include <kern/clock.h>
60 #include <IOKit/IOBSD.h>
61
62 #include <security/audit/audit.h>
63 #include <bsm/audit_kevents.h>
64
65 #include <pexpert/pexpert.h>
66 #include <libkern/section_keywords.h>
67
68 typedef struct kfs_event {
69 LIST_ENTRY(kfs_event) kevent_list;
70 uint64_t abstime; // when this event happened (mach_absolute_time())
71 int16_t type; // type code of this event
72 uint16_t flags; // per-event flags
73 int32_t refcount; // number of clients referencing this
74 pid_t pid;
75 int32_t spare;
76
77 union {
78 struct regular_event {
79 // This must match the layout of fse_info
80 // exactly, except for the "nlink" field is
81 // not included here. See copy_out_kfse()
82 // for all of the sordid details, and also
83 // the _Static_assert() statements below.
84 ino64_t ino;
85 dev_t dev;
86 int32_t mode;
87 uid_t uid;
88 uint32_t document_id;
89 struct kfs_event *dest; // if this is a two-file op
90 const char *str;
91 uint16_t len;
92 } regular_event;
93
94 struct {
95 ino64_t src_ino;
96 ino64_t dst_ino;
97 uint64_t docid;
98 dev_t dev;
99 } docid_event;
100
101 struct {
102 audit_token_t audit_token;
103 const char *str;
104 uint16_t len;
105 } access_granted_event;
106 };
107 } kfs_event;
108
109 _Static_assert(offsetof(struct regular_event, ino) == offsetof(fse_info, ino),
110 "kfs_event and fse_info out-of-sync");
111 _Static_assert(offsetof(struct regular_event, dev) == offsetof(fse_info, dev),
112 "kfs_event and fse_info out-of-sync");
113 _Static_assert(offsetof(struct regular_event, mode) == offsetof(fse_info, mode),
114 "kfs_event and fse_info out-of-sync");
115 _Static_assert(offsetof(struct regular_event, uid) == offsetof(fse_info, uid),
116 "kfs_event and fse_info out-of-sync");
117 _Static_assert(offsetof(struct regular_event, document_id) == offsetof(fse_info, document_id),
118 "kfs_event and fse_info out-of-sync");
119
120 #define KFSE_INFO_COPYSIZE offsetof(fse_info, nlink)
121
122 // flags for the flags field
123 #define KFSE_COMBINED_EVENTS 0x0001
124 #define KFSE_CONTAINS_DROPPED_EVENTS 0x0002
125 #define KFSE_ON_LIST 0x0004
126 #define KFSE_BEING_CREATED 0x0008
127
128 LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
129 int num_events_outstanding = 0;
130 int num_pending_rename = 0;
131
132
133 struct fsevent_handle;
134
135 typedef struct fs_event_watcher {
136 int8_t *event_list; // the events we're interested in
137 int32_t num_events;
138 dev_t *devices_not_to_watch;// report events from devices not in this list
139 uint32_t num_devices;
140 int32_t flags;
141 kfs_event **event_queue;
142 int32_t eventq_size; // number of event pointers in queue
143 int32_t num_readers;
144 int32_t rd; // read index into the event_queue
145 int32_t wr; // write index into the event_queue
146 int32_t blockers;
147 int32_t my_id;
148 uint32_t num_dropped;
149 uint64_t max_event_id;
150 struct fsevent_handle *fseh;
151 pid_t pid;
152 char proc_name[(2 * MAXCOMLEN) + 1];
153 } fs_event_watcher;
154
155 // fs_event_watcher flags
156 #define WATCHER_DROPPED_EVENTS 0x0001
157 #define WATCHER_CLOSING 0x0002
158 #define WATCHER_WANTS_COMPACT_EVENTS 0x0004
159 #define WATCHER_WANTS_EXTENDED_INFO 0x0008
160 #define WATCHER_APPLE_SYSTEM_SERVICE 0x0010 // fseventsd, coreservicesd, mds, revisiond
161
162 #define MAX_WATCHERS 8
163 static fs_event_watcher *watcher_table[MAX_WATCHERS];
164
165 #define DEFAULT_MAX_KFS_EVENTS 4096
166 static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
167
168 // we allocate kfs_event structures out of this zone
169 static zone_t event_zone;
170 static int fs_event_init = 0;
171
172 //
173 // this array records whether anyone is interested in a
174 // particular type of event. if no one is, we bail out
175 // early from the event delivery
176 //
177 static int16_t fs_event_type_watchers[FSE_MAX_EVENTS];
178
179 // the device currently being unmounted:
180 static dev_t fsevent_unmount_dev = 0;
181 // how many ACKs are still outstanding:
182 static int fsevent_unmount_ack_count = 0;
183
184 static int watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse);
185 static void fsevents_wakeup(fs_event_watcher *watcher);
186
187 //
188 // Locks
189 //
190 static LCK_ATTR_DECLARE(fsevent_lock_attr, 0, 0);
191 static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
192 static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
193
194 static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
195 &fsevent_rw_group, &fsevent_lock_attr);
196 static LCK_MTX_DECLARE_ATTR(watch_table_lock,
197 &fsevent_mutex_group, &fsevent_lock_attr);
198 static LCK_MTX_DECLARE_ATTR(event_buf_lock,
199 &fsevent_mutex_group, &fsevent_lock_attr);
200 static LCK_MTX_DECLARE_ATTR(event_writer_lock,
201 &fsevent_mutex_group, &fsevent_lock_attr);
202
203
204 /* Explicitly declare qsort so compiler doesn't complain */
205 __private_extern__ void qsort(
206 void * array,
207 size_t nmembers,
208 size_t member_size,
209 int (*)(const void *, const void *));
210
211 static int
is_ignored_directory(const char * path)212 is_ignored_directory(const char *path)
213 {
214 if (!path) {
215 return 0;
216 }
217
218 #define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
219 if (IS_TLD("/.Spotlight-V100/") ||
220 IS_TLD("/.MobileBackups/") ||
221 IS_TLD("/Backups.backupdb/")) {
222 return 1;
223 }
224 #undef IS_TLD
225
226 return 0;
227 }
228
229 static void
fsevents_internal_init(void)230 fsevents_internal_init(void)
231 {
232 int i;
233
234 if (fs_event_init++ != 0) {
235 return;
236 }
237
238 for (i = 0; i < FSE_MAX_EVENTS; i++) {
239 fs_event_type_watchers[i] = 0;
240 }
241
242 memset(watcher_table, 0, sizeof(watcher_table));
243
244 PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
245
246 event_zone = zone_create_ext("fs-event-buf", sizeof(kfs_event),
247 ZC_NOGC | ZC_NOCALLOUT, ZONE_ID_ANY, ^(zone_t z) {
248 // mark the zone as exhaustible so that it will not
249 // ever grow beyond what we initially filled it with
250 zone_set_exhaustible(z, max_kfs_events, /* exhausts */ true);
251 });
252
253 zone_fill_initially(event_zone, max_kfs_events);
254 }
255
256 static void
lock_watch_table(void)257 lock_watch_table(void)
258 {
259 lck_mtx_lock(&watch_table_lock);
260 }
261
262 static void
unlock_watch_table(void)263 unlock_watch_table(void)
264 {
265 lck_mtx_unlock(&watch_table_lock);
266 }
267
268 static void
lock_fs_event_list(void)269 lock_fs_event_list(void)
270 {
271 lck_mtx_lock(&event_buf_lock);
272 }
273
274 static void
unlock_fs_event_list(void)275 unlock_fs_event_list(void)
276 {
277 lck_mtx_unlock(&event_buf_lock);
278 }
279
280 // forward prototype
281 static void release_event_ref(kfs_event *kfse);
282
283 static boolean_t
watcher_cares_about_dev(fs_event_watcher * watcher,dev_t dev)284 watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
285 {
286 unsigned int i;
287
288 // if devices_not_to_watch is NULL then we care about all
289 // events from all devices
290 if (watcher->devices_not_to_watch == NULL) {
291 return true;
292 }
293
294 for (i = 0; i < watcher->num_devices; i++) {
295 if (dev == watcher->devices_not_to_watch[i]) {
296 // found a match! that means we do not
297 // want events from this device.
298 return false;
299 }
300 }
301
302 // if we're here it's not in the devices_not_to_watch[]
303 // list so that means we do care about it
304 return true;
305 }
306
307
308 int
need_fsevent(int type,vnode_t vp)309 need_fsevent(int type, vnode_t vp)
310 {
311 if (type >= 0 && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == 0) {
312 return 0;
313 }
314
315 // events in /dev aren't really interesting...
316 if (vp->v_tag == VT_DEVFS) {
317 return 0;
318 }
319
320 return 1;
321 }
322
323
324 #define is_throw_away(x) ((x) == FSE_STAT_CHANGED || (x) == FSE_CONTENT_MODIFIED)
325
326
327 int num_dropped = 0;
328
329 static struct timeval last_print;
330
331 //
332 // These variables are used to track coalescing multiple identical
333 // events for the same vnode/pathname. If we get the same event
334 // type and same vnode/pathname as the previous event, we just drop
335 // the event since it's superfluous. This improves some micro-
336 // benchmarks considerably and actually has a real-world impact on
337 // tests like a Finder copy where multiple stat-changed events can
338 // get coalesced.
339 //
340 static int last_event_type = -1;
341 static void *last_ptr = NULL;
342 static char last_str[MAXPATHLEN];
343 static int last_nlen = 0;
344 static int last_vid = -1;
345 static uint64_t last_coalesced_time = 0;
346 static void *last_event_ptr = NULL;
347 static pid_t last_pid = -1;
348 int last_coalesced = 0;
349 static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 };
350
351 #define MAX_HARDLINK_NOTIFICATIONS 128
352
353 static inline void
kfse_init(kfs_event * kfse,int type,uint64_t time,proc_t p)354 kfse_init(kfs_event *kfse, int type, uint64_t time, proc_t p)
355 {
356 memset(kfse, 0, sizeof(*kfse));
357 kfse->refcount = 1;
358 kfse->type = (int16_t)type;
359 kfse->abstime = time;
360 kfse->pid = proc_getpid(p);
361
362 OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags);
363 }
364
365 int
add_fsevent(int type,vfs_context_t ctx,...)366 add_fsevent(int type, vfs_context_t ctx, ...)
367 {
368 struct proc *p = vfs_context_proc(ctx);
369 int i, arg_type, ret;
370 kfs_event *kfse, *kfse_dest = NULL, *cur;
371 fs_event_watcher *watcher;
372 va_list ap;
373 int error = 0, did_alloc = 0;
374 int64_t orig_linkcount = -1;
375 dev_t dev = 0;
376 uint64_t now, elapsed;
377 uint64_t orig_linkid = 0, next_linkid = 0;
378 uint64_t link_parentid = 0;
379 char *pathbuff = NULL, *path_override = NULL;
380 char *link_name = NULL;
381 vnode_t link_vp = NULL;
382 int pathbuff_len = 0;
383 uthread_t ut = get_bsdthread_info(current_thread());
384 bool do_all_links = true;
385 bool do_cache_reset = false;
386
387 if (type == FSE_CONTENT_MODIFIED_NO_HLINK) {
388 do_all_links = false;
389 type = FSE_CONTENT_MODIFIED;
390 }
391
392
393 restart:
394 va_start(ap, ctx);
395
396 // ignore bogus event types..
397 if (type < 0 || type >= FSE_MAX_EVENTS) {
398 return EINVAL;
399 }
400
401 // if no one cares about this type of event, bail out
402 if (fs_event_type_watchers[type] == 0) {
403 va_end(ap);
404
405 return 0;
406 }
407
408 now = mach_absolute_time();
409
410 // find a free event and snag it for our use
411 // NOTE: do not do anything that would block until
412 // the lock is dropped.
413 lock_fs_event_list();
414
415 //
416 // check if this event is identical to the previous one...
417 // (as long as it's not an event type that can never be the
418 // same as a previous event)
419 //
420 if (path_override == NULL &&
421 type != FSE_CREATE_FILE &&
422 type != FSE_DELETE &&
423 type != FSE_RENAME &&
424 type != FSE_EXCHANGE &&
425 type != FSE_CHOWN &&
426 type != FSE_DOCID_CHANGED &&
427 type != FSE_DOCID_CREATED &&
428 type != FSE_CLONE &&
429 // don't coalesce FSE_ACCESS_GRANTED because it could
430 // have been granted to a different process.
431 type != FSE_ACCESS_GRANTED) {
432 void *ptr = NULL;
433 int vid = 0, was_str = 0, nlen = 0;
434
435 for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
436 switch (arg_type) {
437 case FSE_ARG_VNODE: {
438 ptr = va_arg(ap, void *);
439 vid = vnode_vid((struct vnode *)ptr);
440 last_str[0] = '\0';
441 break;
442 }
443 case FSE_ARG_STRING: {
444 nlen = va_arg(ap, int32_t);
445 ptr = va_arg(ap, void *);
446 was_str = 1;
447 break;
448 }
449 }
450 if (ptr != NULL) {
451 break;
452 }
453 }
454
455 if (sTimebaseInfo.denom == 0) {
456 (void) clock_timebase_info(&sTimebaseInfo);
457 }
458
459 elapsed = (now - last_coalesced_time);
460 if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
461 if (sTimebaseInfo.denom == 1) {
462 elapsed *= sTimebaseInfo.numer;
463 } else {
464 // this could overflow... the worst that will happen is that we'll
465 // send (or not send) an extra event so I'm not going to worry about
466 // doing the math right like dtrace_abs_to_nano() does.
467 elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
468 }
469 }
470
471 if (type == last_event_type
472 && (elapsed < 1000000000)
473 && (last_pid == proc_getpid(p))
474 &&
475 ((vid && vid == last_vid && last_ptr == ptr)
476 ||
477 (last_str[0] && last_nlen == nlen && ptr && strcmp(last_str, ptr) == 0))
478 ) {
479 last_coalesced++;
480 unlock_fs_event_list();
481 va_end(ap);
482
483 return 0;
484 } else {
485 last_ptr = ptr;
486 if (ptr && was_str) {
487 strlcpy(last_str, ptr, sizeof(last_str));
488 }
489 last_nlen = nlen;
490 last_vid = vid;
491 last_event_type = type;
492 last_coalesced_time = now;
493 last_pid = proc_getpid(p);
494 }
495 }
496 va_start(ap, ctx);
497
498
499 kfse = zalloc_noblock(event_zone);
500 if (kfse && (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE)) {
501 kfse_dest = zalloc_noblock(event_zone);
502 if (kfse_dest == NULL) {
503 did_alloc = 1;
504 zfree(event_zone, kfse);
505 kfse = NULL;
506 }
507 }
508
509
510 if (kfse == NULL) { // yikes! no free events
511 unlock_fs_event_list();
512 lock_watch_table();
513
514 for (i = 0; i < MAX_WATCHERS; i++) {
515 watcher = watcher_table[i];
516 if (watcher == NULL) {
517 continue;
518 }
519
520 watcher->flags |= WATCHER_DROPPED_EVENTS;
521 fsevents_wakeup(watcher);
522 }
523 unlock_watch_table();
524
525 {
526 struct timeval current_tv;
527
528 num_dropped++;
529
530 // only print a message at most once every 5 seconds
531 microuptime(¤t_tv);
532 if ((current_tv.tv_sec - last_print.tv_sec) > 10) {
533 int ii;
534 void *junkptr = zalloc_noblock(event_zone), *listhead = kfse_list_head.lh_first;
535
536 printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
537 printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
538 printf("add_fsevent: zalloc sez: %p\n", junkptr);
539 printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]);
540 lock_watch_table();
541 for (ii = 0; ii < MAX_WATCHERS; ii++) {
542 if (watcher_table[ii] == NULL) {
543 continue;
544 }
545
546 printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
547 watcher_table[ii]->proc_name,
548 watcher_table[ii],
549 watcher_table[ii]->rd, watcher_table[ii]->wr,
550 watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
551 }
552 unlock_watch_table();
553
554 last_print = current_tv;
555 if (junkptr) {
556 zfree(event_zone, junkptr);
557 }
558 }
559 }
560
561 if (pathbuff) {
562 release_pathbuff(pathbuff);
563 pathbuff = NULL;
564 }
565 return ENOSPC;
566 }
567
568 kfse_init(kfse, type, now, p);
569 last_event_ptr = kfse;
570 if (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE) {
571 kfse_init(kfse_dest, type, now, p);
572 kfse->regular_event.dest = kfse_dest;
573 }
574
575 num_events_outstanding++;
576 if (kfse->type == FSE_RENAME) {
577 num_pending_rename++;
578 }
579 LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
580 OSBitOrAtomic16(KFSE_ON_LIST, &kfse->flags);
581
582 if (kfse->refcount < 1) {
583 panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
584 }
585
586 unlock_fs_event_list(); // at this point it's safe to unlock
587
588 //
589 // now process the arguments passed in and copy them into
590 // the kfse
591 //
592
593 cur = kfse;
594
595 if (type == FSE_DOCID_CREATED || type == FSE_DOCID_CHANGED) {
596 //
597 // These events are special and not like the other events.
598 // They only have a dev_t, src inode #, dest inode #, and
599 // a doc-id (va_arg'd to us in that order). If we don't
600 // get one of them, then the error-check filler will
601 // catch it.
602 //
603 do_all_links = false;
604 arg_type = va_arg(ap, int32_t);
605 if (arg_type == FSE_ARG_DEV) {
606 cur->docid_event.dev = (dev_t)(va_arg(ap, dev_t));
607 }
608
609 arg_type = va_arg(ap, int32_t);
610 if (arg_type == FSE_ARG_INO) {
611 cur->docid_event.src_ino =
612 (ino64_t)(va_arg(ap, ino64_t));
613 }
614
615 arg_type = va_arg(ap, int32_t);
616 if (arg_type == FSE_ARG_INO) {
617 cur->docid_event.dst_ino =
618 (ino64_t)(va_arg(ap, ino64_t));
619 }
620
621 arg_type = va_arg(ap, int32_t);
622 if (arg_type == FSE_ARG_INT32) {
623 cur->docid_event.docid =
624 (uint64_t)va_arg(ap, uint32_t);
625 } else if (arg_type == FSE_ARG_INT64) {
626 cur->docid_event.docid =
627 (uint64_t)va_arg(ap, uint64_t);
628 }
629
630 goto done_with_args;
631 }
632
633 if (type == FSE_ACCESS_GRANTED) {
634 //
635 // This one is also different. We get a path string
636 // and (maybe) and audit token. If we don't get the
637 // audit token, we extract is from the vfs_context_t.
638 //
639 audit_token_t *atokenp = NULL;
640 vnode_t vp = NULL;
641 char *path_str = NULL;
642 size_t path_strlen = 0;
643 void *arg;
644 int32_t len32;
645
646 do_all_links = false;
647
648 while ((arg_type = va_arg(ap, int32_t)) != FSE_ARG_DONE) {
649 switch (arg_type) {
650 case FSE_ARG_STRING:
651 len32 = va_arg(ap, int32_t);
652 arg = va_arg(ap, char *);
653 if (path_str == NULL) {
654 path_str = arg;
655 path_strlen = len32;
656 }
657 break;
658
659 case FSE_ARG_PATH:
660 arg = va_arg(ap, char *);
661 if (path_str == NULL) {
662 path_str = arg;
663 }
664 break;
665
666 case FSE_ARG_VNODE:
667 arg = va_arg(ap, vnode_t);
668 if (vp == NULL) {
669 vp = arg;
670 }
671 break;
672
673 case FSE_ARG_AUDIT_TOKEN:
674 arg = va_arg(ap, audit_token_t *);
675 if (atokenp == NULL) {
676 atokenp = arg;
677 }
678 break;
679
680 default:
681 printf("add_fsevent: FSE_ACCESS_GRANTED unknown type %d\n", arg_type);
682 // just skip one 32-bit word and hope we
683 // sync up...
684 (void)va_arg(ap, int32_t);
685 }
686 }
687
688 if (atokenp != NULL) {
689 memcpy(&cur->access_granted_event.audit_token,
690 atokenp,
691 sizeof(cur->access_granted_event.audit_token));
692 } else if (vfs_context_copy_audit_token(ctx,
693 &cur->access_granted_event.audit_token) != 0) {
694 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
695 &cur->flags);
696 goto done_with_args;
697 }
698
699 //
700 // If we got FSE_ARG_STRING, the length includes the
701 // terminating NUL. If we got FSE_ARG_PATH, all we
702 // got was the string pointer, so get the length and
703 // adjust. If we didn't get either, then the caller
704 // needs to have provided us with a vnode, and with
705 // that we can get the path.
706 //
707 if (path_str != NULL) {
708 if (path_strlen == 0) {
709 path_strlen = strlen(path_str) + 1;
710 }
711 } else if (vp != NULL) {
712 pathbuff = get_pathbuff();
713 pathbuff_len = MAXPATHLEN;
714 pathbuff[0] = '\0';
715 if (vn_getpath_no_firmlink(vp, pathbuff,
716 &pathbuff_len) == 0) {
717 path_str = pathbuff;
718 path_strlen = pathbuff_len;
719 }
720 }
721
722 if (path_str != NULL) {
723 assert(path_strlen <= INT16_MAX);
724 cur->access_granted_event.str =
725 vfs_addname(path_str, (uint32_t)path_strlen, 0, 0);
726 if (path_str == pathbuff) {
727 release_pathbuff(pathbuff);
728 pathbuff = NULL;
729 }
730 }
731 if (cur->access_granted_event.str == NULL) {
732 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
733 &cur->flags);
734 }
735
736 goto done_with_args;
737 }
738
739 if (type == FSE_UNMOUNT_PENDING) {
740 // Just a dev_t
741 // We use the same fields as the regular event, but we
742 // don't have all of the data.
743 do_all_links = false;
744
745 arg_type = va_arg(ap, int32_t);
746 if (arg_type == FSE_ARG_DEV) {
747 cur->regular_event.dev = (dev_t)(va_arg(ap, dev_t));
748 }
749
750 cur->regular_event.dest = NULL;
751 cur->regular_event.str = NULL;
752 cur->regular_event.len = 0;
753
754 goto done_with_args;
755 }
756
757 for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
758 switch (arg_type) {
759 case FSE_ARG_VNODE: {
760 // this expands out into multiple arguments to the client
761 struct vnode *vp;
762 struct vnode_attr va;
763
764 if (kfse->regular_event.str != NULL) {
765 cur = kfse_dest;
766 }
767
768 vp = va_arg(ap, struct vnode *);
769 if (vp == NULL) {
770 panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!",
771 cur->type);
772 }
773
774 VATTR_INIT(&va);
775 VATTR_WANTED(&va, va_fsid);
776 VATTR_WANTED(&va, va_fileid);
777 VATTR_WANTED(&va, va_mode);
778 VATTR_WANTED(&va, va_uid);
779 VATTR_WANTED(&va, va_document_id);
780 VATTR_WANTED(&va, va_nlink);
781 if ((ret = vnode_getattr(vp, &va, vfs_context_kernel())) != 0) {
782 // printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
783 cur->regular_event.str = NULL;
784 error = EINVAL;
785 goto clean_up;
786 }
787
788 cur->regular_event.dev = dev = (dev_t)va.va_fsid;
789 cur->regular_event.ino = (ino64_t)va.va_fileid;
790 cur->regular_event.mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
791 cur->regular_event.uid = va.va_uid;
792 cur->regular_event.document_id = va.va_document_id;
793 if (vp->v_flag & VISHARDLINK) {
794 cur->regular_event.mode |= FSE_MODE_HLINK;
795 if ((vp->v_type == VDIR && va.va_dirlinkcount == 0) || (vp->v_type == VREG && va.va_nlink == 0)) {
796 cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
797 }
798 if (orig_linkid == 0) {
799 orig_linkid = cur->regular_event.ino;
800 orig_linkcount = MIN(va.va_nlink, MAX_HARDLINK_NOTIFICATIONS);
801 link_vp = vp;
802 if (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID && !link_name) {
803 VATTR_INIT(&va);
804 VATTR_WANTED(&va, va_parentid);
805 VATTR_WANTED(&va, va_name);
806 link_name = zalloc(ZV_NAMEI);
807 va.va_name = link_name;
808 if ((ret = vnode_getattr(vp, &va, vfs_context_kernel()) != 0) ||
809 !(VATTR_IS_SUPPORTED(&va, va_name)) ||
810 !(VATTR_IS_SUPPORTED(&va, va_parentid))) {
811 zfree(ZV_NAMEI, link_name);
812 link_name = NULL;
813 }
814 if (link_name) {
815 link_parentid = va.va_parentid;
816 }
817 va.va_name = NULL;
818 }
819 }
820 }
821
822 // if we haven't gotten the path yet, get it.
823 if (pathbuff == NULL && path_override == NULL) {
824 pathbuff = get_pathbuff();
825 pathbuff_len = MAXPATHLEN;
826
827 pathbuff[0] = '\0';
828 if ((ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
829 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
830 &cur->flags);
831
832 do {
833 if (vp->v_parent != NULL) {
834 vp = vp->v_parent;
835 } else if (vp->v_mount) {
836 strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
837 break;
838 } else {
839 vp = NULL;
840 }
841
842 if (vp == NULL) {
843 break;
844 }
845
846 pathbuff_len = MAXPATHLEN;
847 ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
848 } while (ret == ENOSPC);
849
850 if (ret != 0 || vp == NULL) {
851 error = ENOENT;
852 goto clean_up;
853 }
854 }
855 } else if (path_override) {
856 pathbuff = path_override;
857 pathbuff_len = (int)strlen(path_override) + 1;
858 } else {
859 strlcpy(pathbuff, "NOPATH", MAXPATHLEN);
860 pathbuff_len = (int)strlen(pathbuff) + 1;
861 }
862
863 // store the path by adding it to the global string table
864 cur->regular_event.len = (u_int16_t)pathbuff_len;
865 cur->regular_event.str =
866 vfs_addname(pathbuff, pathbuff_len, 0, 0);
867 if (cur->regular_event.str == NULL ||
868 cur->regular_event.str[0] == '\0') {
869 panic("add_fsevent: was not able to add path %s to event %p.", pathbuff, cur);
870 }
871
872 if (pathbuff != path_override) {
873 release_pathbuff(pathbuff);
874 }
875 pathbuff = NULL;
876
877 break;
878 }
879
880 case FSE_ARG_FINFO: {
881 fse_info *fse;
882
883 fse = va_arg(ap, fse_info *);
884
885 cur->regular_event.dev = dev = (dev_t)fse->dev;
886 cur->regular_event.ino = (ino64_t)fse->ino;
887 cur->regular_event.mode = (int32_t)fse->mode;
888 cur->regular_event.uid = (uid_t)fse->uid;
889 cur->regular_event.document_id = (uint32_t)fse->document_id;
890 // if it's a hard-link and this is the last link, flag it
891 if (fse->mode & FSE_MODE_HLINK) {
892 if (fse->nlink == 0) {
893 cur->regular_event.mode |= FSE_MODE_LAST_HLINK;
894 }
895 if (orig_linkid == 0) {
896 orig_linkid = cur->regular_event.ino;
897 orig_linkcount = MIN(fse->nlink, MAX_HARDLINK_NOTIFICATIONS);
898 }
899 }
900 if (cur->regular_event.mode & FSE_TRUNCATED_PATH) {
901 OSBitOrAtomic16(KFSE_CONTAINS_DROPPED_EVENTS,
902 &cur->flags);
903 cur->regular_event.mode &= ~FSE_TRUNCATED_PATH;
904 }
905 break;
906 }
907
908 case FSE_ARG_STRING:
909 if (kfse->regular_event.str != NULL) {
910 cur = kfse_dest;
911 }
912
913 cur->regular_event.len =
914 (int16_t)(va_arg(ap, int32_t) & 0x7fff);
915 if (cur->regular_event.len >= 1) {
916 cur->regular_event.str =
917 vfs_addname(va_arg(ap, char *),
918 cur->regular_event.len, 0, 0);
919 } else {
920 printf("add_fsevent: funny looking string length: %d\n", (int)cur->regular_event.len);
921 cur->regular_event.len = 2;
922 cur->regular_event.str = vfs_addname("/",
923 cur->regular_event.len, 0, 0);
924 }
925 if (cur->regular_event.str[0] == 0) {
926 printf("add_fsevent: bogus looking string (len %d)\n", cur->regular_event.len);
927 }
928 break;
929
930 case FSE_ARG_INT32: {
931 uint32_t ival = (uint32_t)va_arg(ap, int32_t);
932 kfse->regular_event.uid = ival;
933 break;
934 }
935
936 default:
937 printf("add_fsevent: unknown type %d\n", arg_type);
938 // just skip one 32-bit word and hope we sync up...
939 (void)va_arg(ap, int32_t);
940 }
941 }
942
943 done_with_args:
944 va_end(ap);
945
946 // XXX Memory barrier here?
947 if (kfse_dest) {
948 OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse_dest->flags);
949 }
950 OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse->flags);
951
952 //
953 // now we have to go and let everyone know that
954 // is interested in this type of event
955 //
956 lock_watch_table();
957
958 for (i = 0; i < MAX_WATCHERS; i++) {
959 watcher = watcher_table[i];
960 if (watcher == NULL) {
961 continue;
962 }
963
964 if (type < watcher->num_events
965 && watcher->event_list[type] == FSE_REPORT
966 && watcher_cares_about_dev(watcher, dev)) {
967 if (watcher_add_event(watcher, kfse) != 0) {
968 watcher->num_dropped++;
969 continue;
970 }
971 }
972
973 // if (kfse->refcount < 1) {
974 // panic("add_fsevent: line %d: kfse recount %d but should be at least 1", __LINE__, kfse->refcount);
975 // }
976 }
977
978 unlock_watch_table();
979
980 clean_up:
981
982 if (pathbuff) {
983 release_pathbuff(pathbuff);
984 pathbuff = NULL;
985 }
986 // replicate events for sibling hardlinks
987 if (do_all_links &&
988 (kfse->regular_event.mode & FSE_MODE_HLINK) &&
989 !(kfse->regular_event.mode & FSE_MODE_LAST_HLINK) &&
990 (type == FSE_STAT_CHANGED ||
991 type == FSE_CONTENT_MODIFIED ||
992 type == FSE_FINDER_INFO_CHANGED ||
993 type == FSE_XATTR_MODIFIED)) {
994 if (orig_linkcount > 0 && orig_linkid != 0) {
995 #ifndef APFSIOC_NEXT_LINK
996 #define APFSIOC_NEXT_LINK _IOWR('J', 10, uint64_t)
997 #endif
998 if (path_override == NULL) {
999 path_override = get_pathbuff();
1000 }
1001 if (next_linkid == 0) {
1002 next_linkid = orig_linkid;
1003 }
1004
1005 if (link_vp) {
1006 mount_t mp = NULL;
1007 vnode_t mnt_rootvp = NULL;
1008 int iret = -1;
1009
1010 mp = vnode_mount(link_vp);
1011 if (mp) {
1012 iret = VFS_ROOT(mp, &mnt_rootvp, vfs_context_kernel());
1013 }
1014
1015 if (iret == 0 && mnt_rootvp) {
1016 iret = VNOP_IOCTL(mnt_rootvp, APFSIOC_NEXT_LINK, (char *)&next_linkid, (int)0, vfs_context_kernel());
1017 vnode_put(mnt_rootvp);
1018 }
1019
1020 int32_t fsid0;
1021 int path_override_len = MAXPATHLEN;
1022
1023 // continue resolving hardlink paths if there is a valid next_linkid retrieved
1024 // file systems not supporting APFSIOC_NEXT_LINK will skip replicating events for sibling hardlinks
1025 if (iret == 0 && next_linkid != 0) {
1026 fsid0 = link_vp->v_mount->mnt_vfsstat.f_fsid.val[0];
1027 ut->uu_flag |= UT_KERN_RAGE_VNODES;
1028 if (!do_cache_reset) {
1029 do_cache_reset = true;
1030 }
1031 if ((iret = fsgetpath_internal(ctx, fsid0, next_linkid, MAXPATHLEN, path_override, FSOPT_NOFIRMLINKPATH, &path_override_len)) == 0) {
1032 orig_linkcount--;
1033 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1034
1035 if (orig_linkcount >= 0) {
1036 release_event_ref(kfse);
1037 goto restart;
1038 }
1039 } else {
1040 // failed to get override path
1041 // encountered a broken link or the linkid has been deleted before retrieving the path
1042 orig_linkcount--;
1043 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1044
1045 if (orig_linkcount >= 0) {
1046 goto clean_up;
1047 }
1048 }
1049 }
1050 }
1051 }
1052 }
1053
1054 if (link_name) {
1055 /*
1056 * If we call fsgetpath on all the links, it will set the link origin cache
1057 * to the last link that the path was obtained for.
1058 * To restore the the original link id cache in APFS we need to issue a
1059 * lookup on the original directory + name for the link.
1060 */
1061 if (do_cache_reset) {
1062 vnode_t dvp = NULLVP;
1063
1064 if ((ret = VFS_VGET(link_vp->v_mount, (ino64_t)link_parentid, &dvp, vfs_context_kernel())) == 0) {
1065 vnode_t lvp = NULLVP;
1066
1067 ret = vnode_lookupat(link_name, 0, &lvp, ctx, dvp);
1068 if (!ret) {
1069 vnode_put(lvp);
1070 lvp = NULLVP;
1071 }
1072 vnode_put(dvp);
1073 dvp = NULLVP;
1074 }
1075 ret = 0;
1076 }
1077 zfree(ZV_NAMEI, link_name);
1078 link_name = NULL;
1079 }
1080
1081 if (path_override) {
1082 release_pathbuff(path_override);
1083 path_override = NULL;
1084 }
1085
1086 release_event_ref(kfse);
1087
1088 return error;
1089 }
1090
1091 int
test_fse_access_granted(vnode_t vp,unsigned long type,vfs_context_t ctx)1092 test_fse_access_granted(vnode_t vp, unsigned long type, vfs_context_t ctx)
1093 {
1094 audit_token_t atoken;
1095 char *pathbuff;
1096 int error, pathbuff_len;
1097
1098 if (type == 0) {
1099 return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1100 FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1101 }
1102
1103 if (type == 1) {
1104 error = vfs_context_copy_audit_token(ctx, &atoken);
1105 if (error) {
1106 return error;
1107 }
1108 return add_fsevent(FSE_ACCESS_GRANTED, ctx,
1109 FSE_ARG_VNODE, vp, FSE_ARG_AUDIT_TOKEN, &atoken,
1110 FSE_ARG_DONE);
1111 }
1112
1113 if (type == 2 || type == 3) {
1114 pathbuff = get_pathbuff();
1115 pathbuff_len = MAXPATHLEN;
1116 pathbuff[0] = '\0';
1117 error = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
1118 if (error) {
1119 release_pathbuff(pathbuff);
1120 return error;
1121 }
1122 if (type == 2) {
1123 error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1124 FSE_ARG_STRING, pathbuff_len, pathbuff,
1125 FSE_ARG_DONE);
1126 } else {
1127 error = add_fsevent(FSE_ACCESS_GRANTED, ctx,
1128 FSE_ARG_PATH, pathbuff, FSE_ARG_DONE);
1129 }
1130 release_pathbuff(pathbuff);
1131 return error;
1132 }
1133
1134 return ENOTSUP;
1135 }
1136
1137 static void
release_event_ref(kfs_event * kfse)1138 release_event_ref(kfs_event *kfse)
1139 {
1140 int old_refcount;
1141 kfs_event *dest = NULL;
1142 const char *path_str = NULL, *dest_path_str = NULL;
1143
1144 lock_fs_event_list();
1145
1146 old_refcount = OSAddAtomic(-1, &kfse->refcount);
1147 if (old_refcount > 1) {
1148 unlock_fs_event_list();
1149 return;
1150 }
1151
1152 if (last_event_ptr == kfse) {
1153 last_event_ptr = NULL;
1154 last_event_type = -1;
1155 last_coalesced_time = 0;
1156 }
1157
1158 if (kfse->refcount < 0) {
1159 panic("release_event_ref: bogus kfse refcount %d", kfse->refcount);
1160 }
1161
1162 assert(kfse->refcount == 0);
1163 assert(kfse->type != FSE_INVALID);
1164
1165 //
1166 // Get pointers to all the things so we can free without
1167 // holding any locks.
1168 //
1169 if (kfse->type != FSE_DOCID_CREATED &&
1170 kfse->type != FSE_DOCID_CHANGED &&
1171 kfse->type != FSE_ACCESS_GRANTED) {
1172 path_str = kfse->regular_event.str;
1173
1174 dest = kfse->regular_event.dest;
1175 if (dest != NULL) {
1176 assert(dest->type != FSE_INVALID);
1177 if (OSAddAtomic(-1,
1178 &kfse->regular_event.dest->refcount) == 1) {
1179 dest_path_str = dest->regular_event.str;
1180 } else {
1181 dest = NULL;
1182 }
1183 }
1184 }
1185
1186 if (dest != NULL) {
1187 if (dest->flags & KFSE_ON_LIST) {
1188 num_events_outstanding--;
1189 LIST_REMOVE(dest, kevent_list);
1190 }
1191 }
1192
1193 if (kfse->flags & KFSE_ON_LIST) {
1194 num_events_outstanding--;
1195 LIST_REMOVE(kfse, kevent_list);
1196 if (kfse->type == FSE_RENAME) {
1197 num_pending_rename--;
1198 }
1199 }
1200
1201 unlock_fs_event_list();
1202
1203 zfree(event_zone, kfse);
1204 if (dest != NULL) {
1205 zfree(event_zone, dest);
1206 }
1207
1208 if (path_str != NULL) {
1209 vfs_removename(path_str);
1210 }
1211 if (dest_path_str != NULL) {
1212 vfs_removename(dest_path_str);
1213 }
1214 }
1215
1216 #define FSEVENTS_WATCHER_ENTITLEMENT \
1217 "com.apple.private.vfs.fsevents-watcher"
1218
1219 //
1220 // We restrict this for two reasons:
1221 //
1222 // 1- So that naive processes don't get this firehose by default.
1223 //
1224 // 2- Because this event, when delivered to watcheres, includes the
1225 // audit token of the process granted the access, and we don't
1226 // want to leak that to random watchers.
1227 //
1228 #define FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT \
1229 "com.apple.private.vfs.fsevents-access-granted-watcher"
1230
1231 static bool
watcher_is_entitled(task_t task)1232 watcher_is_entitled(task_t task)
1233 {
1234 //
1235 // We consider a process to be entitled to watch /dev/fsevents
1236 // if it has either FSEVENTS_WATCHER_ENTITLEMENT or
1237 // FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT.
1238 //
1239 return !!(IOTaskHasEntitlement(task, FSEVENTS_WATCHER_ENTITLEMENT) ||
1240 IOTaskHasEntitlement(task,
1241 FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT));
1242 }
1243
1244 static bool
watcher_is_entitled_for_access_granted(task_t task)1245 watcher_is_entitled_for_access_granted(task_t task)
1246 {
1247 return !!IOTaskHasEntitlement(task,
1248 FSEVENTS_ACCESS_GRANTED_WATCHER_ENTITLEMENT);
1249 }
1250
1251 static int
add_watcher(int8_t * event_list,int32_t num_events,int32_t eventq_size,fs_event_watcher ** watcher_out,void * fseh)1252 add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
1253 {
1254 int i;
1255 fs_event_watcher *watcher;
1256
1257 if (eventq_size <= 0 || eventq_size > 100 * max_kfs_events) {
1258 eventq_size = max_kfs_events;
1259 }
1260
1261 // If the watcher wants FSE_ACCESS_GRANTED, ensure it has the
1262 // correct entitlement. If not, just silently drop that event.
1263 if (num_events > FSE_ACCESS_GRANTED &&
1264 event_list[FSE_ACCESS_GRANTED] != FSE_IGNORE &&
1265 !watcher_is_entitled_for_access_granted(current_task())) {
1266 event_list[FSE_ACCESS_GRANTED] = FSE_IGNORE;
1267 }
1268
1269 // Note: the event_queue follows the fs_event_watcher struct
1270 // in memory so we only have to do one allocation
1271 watcher = kalloc_type(fs_event_watcher, kfs_event *, eventq_size, Z_WAITOK);
1272 if (watcher == NULL) {
1273 return ENOMEM;
1274 }
1275
1276 watcher->event_list = event_list;
1277 watcher->num_events = num_events;
1278 watcher->devices_not_to_watch = NULL;
1279 watcher->num_devices = 0;
1280 watcher->flags = 0;
1281 watcher->event_queue = (kfs_event **)&watcher[1];
1282 watcher->eventq_size = eventq_size;
1283 watcher->rd = 0;
1284 watcher->wr = 0;
1285 watcher->blockers = 0;
1286 watcher->num_readers = 0;
1287 watcher->max_event_id = 0;
1288 watcher->fseh = fseh;
1289 watcher->pid = proc_selfpid();
1290 proc_selfname(watcher->proc_name, sizeof(watcher->proc_name));
1291
1292 watcher->num_dropped = 0; // XXXdbg - debugging
1293
1294 if (watcher_is_entitled(current_task())) {
1295 watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
1296 } else {
1297 printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported. Migrate to FSEventsFramework\n",
1298 watcher->proc_name, watcher->pid);
1299 }
1300
1301 lock_watch_table();
1302
1303 // find a slot for the new watcher
1304 for (i = 0; i < MAX_WATCHERS; i++) {
1305 if (watcher_table[i] == NULL) {
1306 watcher->my_id = i;
1307 watcher_table[i] = watcher;
1308 break;
1309 }
1310 }
1311
1312 if (i >= MAX_WATCHERS) {
1313 printf("fsevents: too many watchers!\n");
1314 unlock_watch_table();
1315 kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1316 return ENOSPC;
1317 }
1318
1319 // now update the global list of who's interested in
1320 // events of a particular type...
1321 for (i = 0; i < num_events; i++) {
1322 if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1323 fs_event_type_watchers[i]++;
1324 }
1325 }
1326
1327 unlock_watch_table();
1328
1329 *watcher_out = watcher;
1330
1331 return 0;
1332 }
1333
1334
1335
1336 static void
remove_watcher(fs_event_watcher * target)1337 remove_watcher(fs_event_watcher *target)
1338 {
1339 int i, j, counter = 0;
1340 fs_event_watcher *watcher;
1341 kfs_event *kfse;
1342
1343 lock_watch_table();
1344
1345 for (j = 0; j < MAX_WATCHERS; j++) {
1346 watcher = watcher_table[j];
1347 if (watcher != target) {
1348 continue;
1349 }
1350
1351 watcher_table[j] = NULL;
1352
1353 for (i = 0; i < watcher->num_events; i++) {
1354 if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1355 fs_event_type_watchers[i]--;
1356 }
1357 }
1358
1359 if (watcher->flags & WATCHER_CLOSING) {
1360 unlock_watch_table();
1361 return;
1362 }
1363
1364 // printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1365 watcher->flags |= WATCHER_CLOSING;
1366 OSAddAtomic(1, &watcher->num_readers);
1367
1368 unlock_watch_table();
1369
1370 while (watcher->num_readers > 1 && counter++ < 5000) {
1371 lock_watch_table();
1372 fsevents_wakeup(watcher); // in case they're asleep
1373 unlock_watch_table();
1374
1375 tsleep(watcher, PRIBIO, "fsevents-close", 1);
1376 }
1377 if (counter++ >= 5000) {
1378 // printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1379 panic("fsevents: close: still have readers! (%d)", watcher->num_readers);
1380 }
1381
1382 // drain the event_queue
1383
1384 lck_rw_lock_exclusive(&event_handling_lock);
1385 while (watcher->rd != watcher->wr) {
1386 kfse = watcher->event_queue[watcher->rd];
1387 watcher->event_queue[watcher->rd] = NULL;
1388 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1389 OSSynchronizeIO();
1390 if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1391 release_event_ref(kfse);
1392 }
1393 }
1394 lck_rw_unlock_exclusive(&event_handling_lock);
1395
1396 kfree_data(watcher->event_list, watcher->num_events * sizeof(int8_t));
1397 kfree_data(watcher->devices_not_to_watch, watcher->num_devices * sizeof(dev_t));
1398 kfree_type(fs_event_watcher, kfs_event *, watcher->eventq_size, watcher);
1399 return;
1400 }
1401
1402 unlock_watch_table();
1403 }
1404
1405
1406 #define EVENT_DELAY_IN_MS 10
1407 static thread_call_t event_delivery_timer = NULL;
1408 static int timer_set = 0;
1409
1410
1411 static void
delayed_event_delivery(__unused void * param0,__unused void * param1)1412 delayed_event_delivery(__unused void *param0, __unused void *param1)
1413 {
1414 int i;
1415
1416 lock_watch_table();
1417
1418 for (i = 0; i < MAX_WATCHERS; i++) {
1419 if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1420 fsevents_wakeup(watcher_table[i]);
1421 }
1422 }
1423
1424 timer_set = 0;
1425
1426 unlock_watch_table();
1427 }
1428
1429
1430 //
1431 // The watch table must be locked before calling this function.
1432 //
1433 static void
schedule_event_wakeup(void)1434 schedule_event_wakeup(void)
1435 {
1436 uint64_t deadline;
1437
1438 if (event_delivery_timer == NULL) {
1439 event_delivery_timer = thread_call_allocate((thread_call_func_t)delayed_event_delivery, NULL);
1440 }
1441
1442 clock_interval_to_deadline(EVENT_DELAY_IN_MS, 1000 * 1000, &deadline);
1443
1444 thread_call_enter_delayed(event_delivery_timer, deadline);
1445 timer_set = 1;
1446 }
1447
1448
1449
1450 #define MAX_NUM_PENDING 16
1451
1452 //
1453 // NOTE: the watch table must be locked before calling
1454 // this routine.
1455 //
1456 static int
watcher_add_event(fs_event_watcher * watcher,kfs_event * kfse)1457 watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse)
1458 {
1459 if (kfse->abstime > watcher->max_event_id) {
1460 watcher->max_event_id = kfse->abstime;
1461 }
1462
1463 if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
1464 watcher->flags |= WATCHER_DROPPED_EVENTS;
1465 fsevents_wakeup(watcher);
1466 return ENOSPC;
1467 }
1468
1469 OSAddAtomic(1, &kfse->refcount);
1470 watcher->event_queue[watcher->wr] = kfse;
1471 OSSynchronizeIO();
1472 watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
1473
1474 //
1475 // wake up the watcher if there are more than MAX_NUM_PENDING events.
1476 // otherwise schedule a timer (if one isn't already set) which will
1477 // send any pending events if no more are received in the next
1478 // EVENT_DELAY_IN_MS milli-seconds.
1479 //
1480 int32_t num_pending = 0;
1481 if (watcher->rd < watcher->wr) {
1482 num_pending = watcher->wr - watcher->rd;
1483 }
1484
1485 if (watcher->rd > watcher->wr) {
1486 num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1487 }
1488
1489 if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1490 /* Non-Apple Service is falling behind, start dropping events for this process */
1491 lck_rw_lock_exclusive(&event_handling_lock);
1492 while (watcher->rd != watcher->wr) {
1493 kfse = watcher->event_queue[watcher->rd];
1494 watcher->event_queue[watcher->rd] = NULL;
1495 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1496 OSSynchronizeIO();
1497 if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1498 release_event_ref(kfse);
1499 }
1500 }
1501 watcher->flags |= WATCHER_DROPPED_EVENTS;
1502 lck_rw_unlock_exclusive(&event_handling_lock);
1503
1504 printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1505 watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1506 watcher->eventq_size, watcher->flags);
1507
1508 fsevents_wakeup(watcher);
1509 } else if (num_pending > MAX_NUM_PENDING) {
1510 fsevents_wakeup(watcher);
1511 } else if (timer_set == 0) {
1512 schedule_event_wakeup();
1513 }
1514
1515 return 0;
1516 }
1517
1518 static int
fill_buff(uint16_t type,int32_t size,const void * data,char * buff,int32_t * _buff_idx,int32_t buff_sz,struct uio * uio)1519 fill_buff(uint16_t type, int32_t size, const void *data,
1520 char *buff, int32_t *_buff_idx, int32_t buff_sz,
1521 struct uio *uio)
1522 {
1523 int32_t amt, error = 0, buff_idx = *_buff_idx;
1524 uint16_t tmp;
1525
1526 //
1527 // the +1 on the size is to guarantee that the main data
1528 // copy loop will always copy at least 1 byte
1529 //
1530 if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
1531 if (buff_idx > uio_resid(uio)) {
1532 error = ENOSPC;
1533 goto get_out;
1534 }
1535
1536 error = uiomove(buff, buff_idx, uio);
1537 if (error) {
1538 goto get_out;
1539 }
1540 buff_idx = 0;
1541 }
1542
1543 // copy out the header (type & size)
1544 memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
1545 buff_idx += sizeof(uint16_t);
1546
1547 tmp = size & 0xffff;
1548 memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
1549 buff_idx += sizeof(uint16_t);
1550
1551 // now copy the body of the data, flushing along the way
1552 // if the buffer fills up.
1553 //
1554 while (size > 0) {
1555 amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1556 memcpy(&buff[buff_idx], data, amt);
1557
1558 size -= amt;
1559 buff_idx += amt;
1560 data = (const char *)data + amt;
1561 if (size > (buff_sz - buff_idx)) {
1562 if (buff_idx > uio_resid(uio)) {
1563 error = ENOSPC;
1564 goto get_out;
1565 }
1566 error = uiomove(buff, buff_idx, uio);
1567 if (error) {
1568 goto get_out;
1569 }
1570 buff_idx = 0;
1571 }
1572
1573 if (amt == 0) { // just in case...
1574 break;
1575 }
1576 }
1577
1578 get_out:
1579 *_buff_idx = buff_idx;
1580
1581 return error;
1582 }
1583
1584
1585 static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio) __attribute__((noinline));
1586
1587 static int
copy_out_kfse(fs_event_watcher * watcher,kfs_event * kfse,struct uio * uio)1588 copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
1589 {
1590 int error;
1591 uint16_t tmp16;
1592 int32_t type;
1593 kfs_event *cur;
1594 char evbuff[512];
1595 int evbuff_idx = 0;
1596
1597 if (kfse->type == FSE_INVALID) {
1598 panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d)", kfse, kfse->refcount);
1599 }
1600
1601 if (kfse->flags & KFSE_BEING_CREATED) {
1602 return 0;
1603 }
1604
1605 if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) &&
1606 kfse->regular_event.dest == NULL) {
1607 //
1608 // This can happen if an event gets recycled but we had a
1609 // pointer to it in our event queue. The event is the
1610 // destination of a rename or clone which we'll process
1611 // separately (that is, another kfse points to this one
1612 // so it's ok to skip this guy because we'll process it
1613 // when we process the other one)
1614 error = 0;
1615 goto get_out;
1616 }
1617
1618 if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1619 type = (kfse->type & 0xfff);
1620
1621 if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1622 type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1623 } else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1624 type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1625 }
1626 } else {
1627 type = (int32_t)kfse->type;
1628 }
1629
1630 // copy out the type of the event
1631 memcpy(evbuff, &type, sizeof(int32_t));
1632 evbuff_idx += sizeof(int32_t);
1633
1634 // copy out the pid of the person that generated the event
1635 memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
1636 evbuff_idx += sizeof(pid_t);
1637
1638 cur = kfse;
1639
1640 copy_again:
1641
1642 if (kfse->type == FSE_DOCID_CHANGED ||
1643 kfse->type == FSE_DOCID_CREATED) {
1644 dev_t dev = cur->docid_event.dev;
1645 ino64_t src_ino = cur->docid_event.src_ino;
1646 ino64_t dst_ino = cur->docid_event.dst_ino;
1647 uint64_t docid = cur->docid_event.docid;
1648
1649 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff,
1650 &evbuff_idx, sizeof(evbuff), uio);
1651 if (error != 0) {
1652 goto get_out;
1653 }
1654
1655 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &src_ino,
1656 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1657 if (error != 0) {
1658 goto get_out;
1659 }
1660
1661 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &dst_ino,
1662 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1663 if (error != 0) {
1664 goto get_out;
1665 }
1666
1667 error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &docid,
1668 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1669 if (error != 0) {
1670 goto get_out;
1671 }
1672
1673 goto done;
1674 }
1675
1676 if (kfse->type == FSE_UNMOUNT_PENDING) {
1677 dev_t dev = cur->regular_event.dev;
1678
1679 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev,
1680 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1681 if (error != 0) {
1682 goto get_out;
1683 }
1684
1685 goto done;
1686 }
1687
1688 if (kfse->type == FSE_ACCESS_GRANTED) {
1689 //
1690 // KFSE_CONTAINS_DROPPED_EVENTS will be set if either
1691 // the path or audit token are bogus; don't copy out
1692 // either in that case.
1693 //
1694 if (cur->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1695 goto done;
1696 }
1697 error = fill_buff(FSE_ARG_STRING,
1698 cur->access_granted_event.len,
1699 cur->access_granted_event.str,
1700 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1701 if (error != 0) {
1702 goto get_out;
1703 }
1704 error = fill_buff(FSE_ARG_AUDIT_TOKEN,
1705 sizeof(cur->access_granted_event.audit_token),
1706 &cur->access_granted_event.audit_token,
1707 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1708 if (error != 0) {
1709 goto get_out;
1710 }
1711
1712 goto done;
1713 }
1714
1715 if (cur->regular_event.str == NULL ||
1716 cur->regular_event.str[0] == '\0') {
1717 printf("copy_out_kfse:2: empty/short path (%s)\n",
1718 cur->regular_event.str);
1719 error = fill_buff(FSE_ARG_STRING, 2, "/", evbuff, &evbuff_idx,
1720 sizeof(evbuff), uio);
1721 } else {
1722 error = fill_buff(FSE_ARG_STRING, cur->regular_event.len,
1723 cur->regular_event.str, evbuff, &evbuff_idx,
1724 sizeof(evbuff), uio);
1725 }
1726 if (error != 0) {
1727 goto get_out;
1728 }
1729
1730 if (cur->regular_event.dev == 0 && cur->regular_event.ino == 0) {
1731 // this happens when a rename event happens and the
1732 // destination of the rename did not previously exist.
1733 // it thus has no other file info so skip copying out
1734 // the stuff below since it isn't initialized
1735 goto done;
1736 }
1737
1738
1739 if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1740 // We rely on the layout of the "regular_event"
1741 // structure being the same as fse_info in order
1742 // to speed up this copy. The nlink field in
1743 // fse_info is not included.
1744 error = fill_buff(FSE_ARG_FINFO, KFSE_INFO_COPYSIZE,
1745 &cur->regular_event, evbuff, &evbuff_idx,
1746 sizeof(evbuff), uio);
1747 if (error != 0) {
1748 goto get_out;
1749 }
1750 } else {
1751 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t),
1752 &cur->regular_event.dev, evbuff, &evbuff_idx,
1753 sizeof(evbuff), uio);
1754 if (error != 0) {
1755 goto get_out;
1756 }
1757
1758 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t),
1759 &cur->regular_event.ino, evbuff, &evbuff_idx,
1760 sizeof(evbuff), uio);
1761 if (error != 0) {
1762 goto get_out;
1763 }
1764
1765 error = fill_buff(FSE_ARG_MODE, sizeof(int32_t),
1766 &cur->regular_event.mode, evbuff, &evbuff_idx,
1767 sizeof(evbuff), uio);
1768 if (error != 0) {
1769 goto get_out;
1770 }
1771
1772 error = fill_buff(FSE_ARG_UID, sizeof(uid_t),
1773 &cur->regular_event.uid, evbuff, &evbuff_idx,
1774 sizeof(evbuff), uio);
1775 if (error != 0) {
1776 goto get_out;
1777 }
1778
1779 error = fill_buff(FSE_ARG_GID, sizeof(gid_t),
1780 &cur->regular_event.document_id, evbuff, &evbuff_idx,
1781 sizeof(evbuff), uio);
1782 if (error != 0) {
1783 goto get_out;
1784 }
1785 }
1786
1787 if (cur->regular_event.dest) {
1788 cur = cur->regular_event.dest;
1789 goto copy_again;
1790 }
1791
1792 done:
1793 // very last thing: the time stamp
1794 error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime,
1795 evbuff, &evbuff_idx, sizeof(evbuff), uio);
1796 if (error != 0) {
1797 goto get_out;
1798 }
1799
1800 // check if the FSE_ARG_DONE will fit
1801 if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1802 if (evbuff_idx > uio_resid(uio)) {
1803 error = ENOSPC;
1804 goto get_out;
1805 }
1806 error = uiomove(evbuff, evbuff_idx, uio);
1807 if (error) {
1808 goto get_out;
1809 }
1810 evbuff_idx = 0;
1811 }
1812
1813 tmp16 = FSE_ARG_DONE;
1814 memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
1815 evbuff_idx += sizeof(uint16_t);
1816
1817 // flush any remaining data in the buffer (and hopefully
1818 // in most cases this is the only uiomove we'll do)
1819 if (evbuff_idx > uio_resid(uio)) {
1820 error = ENOSPC;
1821 } else {
1822 error = uiomove(evbuff, evbuff_idx, uio);
1823 }
1824
1825 get_out:
1826
1827 return error;
1828 }
1829
1830
1831
1832 static int
fmod_watch(fs_event_watcher * watcher,struct uio * uio)1833 fmod_watch(fs_event_watcher *watcher, struct uio *uio)
1834 {
1835 int error = 0;
1836 user_ssize_t last_full_event_resid;
1837 kfs_event *kfse;
1838 uint16_t tmp16;
1839 int skipped;
1840
1841 last_full_event_resid = uio_resid(uio);
1842
1843 // need at least 2048 bytes of space (maxpathlen + 1 event buf)
1844 if (uio_resid(uio) < 2048 || watcher == NULL) {
1845 return EINVAL;
1846 }
1847
1848 if (watcher->flags & WATCHER_CLOSING) {
1849 return 0;
1850 }
1851
1852 if (OSAddAtomic(1, &watcher->num_readers) != 0) {
1853 // don't allow multiple threads to read from the fd at the same time
1854 OSAddAtomic(-1, &watcher->num_readers);
1855 return EAGAIN;
1856 }
1857
1858 restart_watch:
1859 if (watcher->rd == watcher->wr) {
1860 if (watcher->flags & WATCHER_CLOSING) {
1861 OSAddAtomic(-1, &watcher->num_readers);
1862 return 0;
1863 }
1864 OSAddAtomic(1, &watcher->blockers);
1865
1866 // there's nothing to do, go to sleep
1867 error = tsleep((caddr_t)watcher, PUSER | PCATCH, "fsevents_empty", 0);
1868
1869 OSAddAtomic(-1, &watcher->blockers);
1870
1871 if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
1872 OSAddAtomic(-1, &watcher->num_readers);
1873 return error;
1874 }
1875 }
1876
1877 // if we dropped events, return that as an event first
1878 if (watcher->flags & WATCHER_DROPPED_EVENTS) {
1879 int32_t val = FSE_EVENTS_DROPPED;
1880
1881 error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1882 if (error == 0) {
1883 val = 0; // a fake pid
1884 error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1885
1886 tmp16 = FSE_ARG_DONE; // makes it a consistent msg
1887 error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
1888
1889 last_full_event_resid = uio_resid(uio);
1890 }
1891
1892 if (error) {
1893 OSAddAtomic(-1, &watcher->num_readers);
1894 return error;
1895 }
1896
1897 watcher->flags &= ~WATCHER_DROPPED_EVENTS;
1898 }
1899
1900 skipped = 0;
1901
1902 lck_rw_lock_shared(&event_handling_lock);
1903 while (uio_resid(uio) > 0 && watcher->rd != watcher->wr) {
1904 if (watcher->flags & WATCHER_CLOSING) {
1905 break;
1906 }
1907
1908 //
1909 // check if the event is something of interest to us
1910 // (since it may have been recycled/reused and changed
1911 // its type or which device it is for)
1912 //
1913 kfse = watcher->event_queue[watcher->rd];
1914 if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
1915 break;
1916 }
1917
1918 if (watcher->event_list[kfse->type] == FSE_REPORT) {
1919 if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) &&
1920 kfse->type != FSE_DOCID_CREATED &&
1921 kfse->type != FSE_DOCID_CHANGED &&
1922 kfse->type != FSE_ACCESS_GRANTED &&
1923 is_ignored_directory(kfse->regular_event.str)) {
1924 // If this is not an Apple System Service, skip specified directories
1925 // radar://12034844
1926 error = 0;
1927 skipped = 1;
1928 } else {
1929 skipped = 0;
1930 if (last_event_ptr == kfse) {
1931 last_event_ptr = NULL;
1932 last_event_type = -1;
1933 last_coalesced_time = 0;
1934 }
1935 error = copy_out_kfse(watcher, kfse, uio);
1936 if (error != 0) {
1937 // if an event won't fit or encountered an error while
1938 // we were copying it out, then backup to the last full
1939 // event and just bail out. if the error was ENOENT
1940 // then we can continue regular processing, otherwise
1941 // we should unlock things and return.
1942 uio_setresid(uio, last_full_event_resid);
1943 if (error != ENOENT) {
1944 lck_rw_unlock_shared(&event_handling_lock);
1945 error = 0;
1946 goto get_out;
1947 }
1948 }
1949
1950 last_full_event_resid = uio_resid(uio);
1951 }
1952 }
1953
1954 watcher->event_queue[watcher->rd] = NULL;
1955 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1956 OSSynchronizeIO();
1957 release_event_ref(kfse);
1958 }
1959 lck_rw_unlock_shared(&event_handling_lock);
1960
1961 if (skipped && error == 0) {
1962 goto restart_watch;
1963 }
1964
1965 get_out:
1966 OSAddAtomic(-1, &watcher->num_readers);
1967
1968 return error;
1969 }
1970
1971
1972 //
1973 // Shoo watchers away from a volume that's about to be unmounted
1974 // (so that it can be cleanly unmounted).
1975 //
1976 void
fsevent_unmount(__unused struct mount * mp,__unused vfs_context_t ctx)1977 fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
1978 {
1979 #if !defined(XNU_TARGET_OS_OSX)
1980 dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
1981 int error, waitcount = 0;
1982 struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
1983
1984 // wait for any other pending unmounts to complete
1985 lock_watch_table();
1986 while (fsevent_unmount_dev != 0) {
1987 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
1988 if (error == EWOULDBLOCK) {
1989 error = 0;
1990 }
1991 if (!error && (++waitcount >= 10)) {
1992 error = EWOULDBLOCK;
1993 printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
1994 }
1995 if (error) {
1996 // there's a problem, bail out
1997 unlock_watch_table();
1998 return;
1999 }
2000 }
2001 if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
2002 // nobody watching for unmount pending events
2003 unlock_watch_table();
2004 return;
2005 }
2006 // this is now the current unmount pending
2007 fsevent_unmount_dev = dev;
2008 fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
2009 unlock_watch_table();
2010
2011 // send an event to notify the watcher they need to get off the mount
2012 error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
2013
2014 // wait for acknowledgment(s) (give up if it takes too long)
2015 lock_watch_table();
2016 waitcount = 0;
2017 while (fsevent_unmount_dev == dev) {
2018 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
2019 if (error == EWOULDBLOCK) {
2020 error = 0;
2021 }
2022 if (!error && (++waitcount >= 10)) {
2023 error = EWOULDBLOCK;
2024 printf("unmount pending ack timeout for dev %d\n", dev);
2025 }
2026 if (error) {
2027 // there's a problem, bail out
2028 if (fsevent_unmount_dev == dev) {
2029 fsevent_unmount_dev = 0;
2030 fsevent_unmount_ack_count = 0;
2031 }
2032 wakeup((caddr_t)&fsevent_unmount_dev);
2033 break;
2034 }
2035 }
2036 unlock_watch_table();
2037 #endif /* ! XNU_TARGET_OS_OSX */
2038 }
2039
2040
2041 //
2042 // /dev/fsevents device code
2043 //
2044 static int fsevents_installed = 0;
2045
2046 typedef struct fsevent_handle {
2047 UInt32 flags;
2048 SInt32 active;
2049 fs_event_watcher *watcher;
2050 struct klist knotes;
2051 struct selinfo si;
2052 } fsevent_handle;
2053
2054 #define FSEH_CLOSING 0x0001
2055
2056 static int
fseventsf_read(struct fileproc * fp,struct uio * uio,__unused int flags,__unused vfs_context_t ctx)2057 fseventsf_read(struct fileproc *fp, struct uio *uio,
2058 __unused int flags, __unused vfs_context_t ctx)
2059 {
2060 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2061 int error;
2062
2063 error = fmod_watch(fseh->watcher, uio);
2064
2065 return error;
2066 }
2067
2068
2069 #pragma pack(push, 4)
2070 typedef struct fsevent_dev_filter_args32 {
2071 uint32_t num_devices;
2072 user32_addr_t devices;
2073 } fsevent_dev_filter_args32;
2074 typedef struct fsevent_dev_filter_args64 {
2075 uint32_t num_devices;
2076 user64_addr_t devices;
2077 } fsevent_dev_filter_args64;
2078 #pragma pack(pop)
2079
2080 #define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32)
2081 #define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64)
2082
2083 static int
fseventsf_ioctl(struct fileproc * fp,u_long cmd,caddr_t data,vfs_context_t ctx)2084 fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
2085 {
2086 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2087 int ret = 0;
2088 fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
2089
2090 OSAddAtomic(1, &fseh->active);
2091 if (fseh->flags & FSEH_CLOSING) {
2092 OSAddAtomic(-1, &fseh->active);
2093 return 0;
2094 }
2095
2096 switch (cmd) {
2097 case FIONBIO:
2098 case FIOASYNC:
2099 break;
2100
2101 case FSEVENTS_WANT_COMPACT_EVENTS: {
2102 fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
2103 break;
2104 }
2105
2106 case FSEVENTS_WANT_EXTENDED_INFO: {
2107 fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
2108 break;
2109 }
2110
2111 case FSEVENTS_GET_CURRENT_ID: {
2112 *(uint64_t *)data = fseh->watcher->max_event_id;
2113 ret = 0;
2114 break;
2115 }
2116
2117 case FSEVENTS_DEVICE_FILTER_32: {
2118 if (proc_is64bit(vfs_context_proc(ctx))) {
2119 ret = EINVAL;
2120 break;
2121 }
2122 fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
2123
2124 devfilt_args = &_devfilt_args;
2125 memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
2126 devfilt_args->num_devices = devfilt_args32->num_devices;
2127 devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices);
2128 goto handle_dev_filter;
2129 }
2130
2131 case FSEVENTS_DEVICE_FILTER_64:
2132 if (!proc_is64bit(vfs_context_proc(ctx))) {
2133 ret = EINVAL;
2134 break;
2135 }
2136 devfilt_args = (fsevent_dev_filter_args64 *)data;
2137
2138 handle_dev_filter:
2139 {
2140 int new_num_devices, old_num_devices = 0;
2141 dev_t *devices_not_to_watch, *tmp = NULL;
2142
2143 if (devfilt_args->num_devices > 256) {
2144 ret = EINVAL;
2145 break;
2146 }
2147
2148 new_num_devices = devfilt_args->num_devices;
2149 if (new_num_devices == 0) {
2150 lock_watch_table();
2151
2152 tmp = fseh->watcher->devices_not_to_watch;
2153 fseh->watcher->devices_not_to_watch = NULL;
2154 old_num_devices = fseh->watcher->num_devices;
2155 fseh->watcher->num_devices = new_num_devices;
2156
2157 unlock_watch_table();
2158 kfree_data(tmp, old_num_devices * sizeof(dev_t));
2159 break;
2160 }
2161
2162 devices_not_to_watch = kalloc_data(new_num_devices * sizeof(dev_t), Z_WAITOK);
2163 if (devices_not_to_watch == NULL) {
2164 ret = ENOMEM;
2165 break;
2166 }
2167
2168 ret = copyin((user_addr_t)devfilt_args->devices,
2169 (void *)devices_not_to_watch,
2170 new_num_devices * sizeof(dev_t));
2171 if (ret) {
2172 kfree_data(devices_not_to_watch, new_num_devices * sizeof(dev_t));
2173 break;
2174 }
2175
2176 lock_watch_table();
2177 old_num_devices = fseh->watcher->num_devices;
2178 fseh->watcher->num_devices = new_num_devices;
2179 tmp = fseh->watcher->devices_not_to_watch;
2180 fseh->watcher->devices_not_to_watch = devices_not_to_watch;
2181 unlock_watch_table();
2182
2183 kfree_data(tmp, old_num_devices * sizeof(dev_t));
2184
2185 break;
2186 }
2187
2188 case FSEVENTS_UNMOUNT_PENDING_ACK: {
2189 lock_watch_table();
2190 dev_t dev = *(dev_t *)data;
2191 if (fsevent_unmount_dev == dev) {
2192 if (--fsevent_unmount_ack_count <= 0) {
2193 fsevent_unmount_dev = 0;
2194 wakeup((caddr_t)&fsevent_unmount_dev);
2195 }
2196 } else {
2197 printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
2198 ret = EINVAL;
2199 }
2200 unlock_watch_table();
2201 break;
2202 }
2203
2204 default:
2205 ret = EINVAL;
2206 break;
2207 }
2208
2209 OSAddAtomic(-1, &fseh->active);
2210 return ret;
2211 }
2212
2213
2214 static int
fseventsf_select(struct fileproc * fp,int which,__unused void * wql,vfs_context_t ctx)2215 fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
2216 {
2217 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2218 int ready = 0;
2219
2220 if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
2221 return 0;
2222 }
2223
2224
2225 // if there's nothing in the queue, we're not ready
2226 if (fseh->watcher->rd != fseh->watcher->wr) {
2227 ready = 1;
2228 }
2229
2230 if (!ready) {
2231 lock_watch_table();
2232 selrecord(vfs_context_proc(ctx), &fseh->si, wql);
2233 unlock_watch_table();
2234 }
2235
2236 return ready;
2237 }
2238
2239
2240 #if NOTUSED
2241 static int
fseventsf_stat(__unused struct fileproc * fp,__unused struct stat * sb,__unused vfs_context_t ctx)2242 fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
2243 {
2244 return ENOTSUP;
2245 }
2246 #endif
2247
2248 static int
fseventsf_close(struct fileglob * fg,__unused vfs_context_t ctx)2249 fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
2250 {
2251 fsevent_handle *fseh = (struct fsevent_handle *)fg_get_data(fg);
2252 fs_event_watcher *watcher;
2253
2254 OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
2255 while (OSAddAtomic(0, &fseh->active) > 0) {
2256 tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", 1);
2257 }
2258
2259 watcher = fseh->watcher;
2260 fg_set_data(fg, NULL);
2261 fseh->watcher = NULL;
2262
2263 remove_watcher(watcher);
2264 selthreadclear(&fseh->si);
2265 kfree_type(fsevent_handle, fseh);
2266
2267 return 0;
2268 }
2269
2270 static void
filt_fsevent_detach(struct knote * kn)2271 filt_fsevent_detach(struct knote *kn)
2272 {
2273 fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2274
2275 lock_watch_table();
2276
2277 KNOTE_DETACH(&fseh->knotes, kn);
2278
2279 unlock_watch_table();
2280 }
2281
2282 /*
2283 * Determine whether this knote should be active
2284 *
2285 * This is kind of subtle.
2286 * --First, notice if the vnode has been revoked: in so, override hint
2287 * --EVFILT_READ knotes are checked no matter what the hint is
2288 * --Other knotes activate based on hint.
2289 * --If hint is revoke, set special flags and activate
2290 */
2291 static int
filt_fsevent_common(struct knote * kn,struct kevent_qos_s * kev,long hint)2292 filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
2293 {
2294 fsevent_handle *fseh = (struct fsevent_handle *)knote_kn_hook_get_raw(kn);
2295 int activate = 0;
2296 int32_t rd, wr, amt;
2297 int64_t data = 0;
2298
2299 if (NOTE_REVOKE == hint) {
2300 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2301 activate = 1;
2302 }
2303
2304 rd = fseh->watcher->rd;
2305 wr = fseh->watcher->wr;
2306 if (rd <= wr) {
2307 amt = wr - rd;
2308 } else {
2309 amt = fseh->watcher->eventq_size - (rd - wr);
2310 }
2311
2312 switch (kn->kn_filter) {
2313 case EVFILT_READ:
2314 data = amt;
2315 activate = (data != 0);
2316 break;
2317 case EVFILT_VNODE:
2318 /* Check events this note matches against the hint */
2319 if (kn->kn_sfflags & hint) {
2320 kn->kn_fflags |= (uint32_t)hint; /* Set which event occurred */
2321 }
2322 if (kn->kn_fflags != 0) {
2323 activate = 1;
2324 }
2325 break;
2326 default:
2327 // nothing to do...
2328 break;
2329 }
2330
2331 if (activate && kev) {
2332 knote_fill_kevent(kn, kev, data);
2333 }
2334 return activate;
2335 }
2336
2337 static int
filt_fsevent(struct knote * kn,long hint)2338 filt_fsevent(struct knote *kn, long hint)
2339 {
2340 return filt_fsevent_common(kn, NULL, hint);
2341 }
2342
2343 static int
filt_fsevent_touch(struct knote * kn,struct kevent_qos_s * kev)2344 filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
2345 {
2346 int res;
2347
2348 lock_watch_table();
2349
2350 /* accept new fflags/data as saved */
2351 kn->kn_sfflags = kev->fflags;
2352 kn->kn_sdata = kev->data;
2353
2354 /* restrict the current results to the (smaller?) set of new interest */
2355 /*
2356 * For compatibility with previous implementations, we leave kn_fflags
2357 * as they were before.
2358 */
2359 //kn->kn_fflags &= kev->fflags;
2360
2361 /* determine if the filter is now fired */
2362 res = filt_fsevent_common(kn, NULL, 0);
2363
2364 unlock_watch_table();
2365
2366 return res;
2367 }
2368
2369 static int
filt_fsevent_process(struct knote * kn,struct kevent_qos_s * kev)2370 filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
2371 {
2372 int res;
2373
2374 lock_watch_table();
2375
2376 res = filt_fsevent_common(kn, kev, 0);
2377
2378 unlock_watch_table();
2379
2380 return res;
2381 }
2382
2383 SECURITY_READ_ONLY_EARLY(struct filterops) fsevent_filtops = {
2384 .f_isfd = 1,
2385 .f_attach = NULL,
2386 .f_detach = filt_fsevent_detach,
2387 .f_event = filt_fsevent,
2388 .f_touch = filt_fsevent_touch,
2389 .f_process = filt_fsevent_process,
2390 };
2391
2392 static int
fseventsf_kqfilter(struct fileproc * fp,struct knote * kn,__unused struct kevent_qos_s * kev)2393 fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
2394 __unused struct kevent_qos_s *kev)
2395 {
2396 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2397 int res;
2398
2399 kn->kn_filtid = EVFILTID_FSEVENT;
2400 knote_kn_hook_set_raw(kn, (void *) fseh);
2401
2402 lock_watch_table();
2403
2404 KNOTE_ATTACH(&fseh->knotes, kn);
2405
2406 /* check to see if it is fired already */
2407 res = filt_fsevent_common(kn, NULL, 0);
2408
2409 unlock_watch_table();
2410
2411 return res;
2412 }
2413
2414
2415 static int
fseventsf_drain(struct fileproc * fp,__unused vfs_context_t ctx)2416 fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2417 {
2418 int counter = 0;
2419 fsevent_handle *fseh = (struct fsevent_handle *)fp_get_data(fp);
2420
2421 // if there are people still waiting, sleep for 10ms to
2422 // let them clean up and get out of there. however we
2423 // also don't want to get stuck forever so if they don't
2424 // exit after 5 seconds we're tearing things down anyway.
2425 while (fseh->watcher->blockers && counter++ < 500) {
2426 // issue wakeup in case anyone is blocked waiting for an event
2427 // do this each time we wakeup in case the blocker missed
2428 // the wakeup due to the unprotected test of WATCHER_CLOSING
2429 // and decision to tsleep in fmod_watch... this bit of
2430 // latency is a decent tradeoff against not having to
2431 // take and drop a lock in fmod_watch
2432 lock_watch_table();
2433 fsevents_wakeup(fseh->watcher);
2434 unlock_watch_table();
2435
2436 tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1);
2437 }
2438
2439 return 0;
2440 }
2441
2442
2443 static int
fseventsopen(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2444 fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2445 {
2446 if (!kauth_cred_issuser(kauth_cred_get())) {
2447 return EPERM;
2448 }
2449
2450 return 0;
2451 }
2452
2453 static int
fseventsclose(__unused dev_t dev,__unused int flag,__unused int mode,__unused struct proc * p)2454 fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2455 {
2456 return 0;
2457 }
2458
2459 static int
fseventsread(__unused dev_t dev,__unused struct uio * uio,__unused int ioflag)2460 fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
2461 {
2462 return EIO;
2463 }
2464
2465
2466 static int
parse_buffer_and_add_events(const char * buffer,size_t bufsize,vfs_context_t ctx,size_t * remainder)2467 parse_buffer_and_add_events(const char *buffer, size_t bufsize, vfs_context_t ctx, size_t *remainder)
2468 {
2469 const fse_info *finfo, *dest_finfo;
2470 const char *path, *ptr, *dest_path, *event_start = buffer;
2471 size_t path_len, dest_path_len;
2472 int type, err = 0;
2473
2474
2475 ptr = buffer;
2476 while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
2477 type = *(const int *)ptr;
2478 if (type < 0 || type == FSE_ACCESS_GRANTED ||
2479 type >= FSE_MAX_EVENTS) {
2480 err = EINVAL;
2481 break;
2482 }
2483
2484 ptr += sizeof(int);
2485
2486 finfo = (const fse_info *)ptr;
2487 ptr += sizeof(fse_info);
2488
2489 path = ptr;
2490 while (ptr < buffer + bufsize && *ptr != '\0') {
2491 ptr++;
2492 }
2493
2494 if (ptr >= buffer + bufsize) {
2495 break;
2496 }
2497
2498 ptr++; // advance over the trailing '\0'
2499
2500 path_len = ptr - path;
2501
2502 if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2503 event_start = ptr; // record where the next event starts
2504
2505 err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2506 if (err) {
2507 break;
2508 }
2509 continue;
2510 }
2511
2512 //
2513 // if we're here we have to slurp up the destination finfo
2514 // and path so that we can pass them to the add_fsevent()
2515 // call. basically it's a copy of the above code.
2516 //
2517 dest_finfo = (const fse_info *)ptr;
2518 ptr += sizeof(fse_info);
2519
2520 dest_path = ptr;
2521 while (ptr < buffer + bufsize && *ptr != '\0') {
2522 ptr++;
2523 }
2524
2525 if (ptr >= buffer + bufsize) {
2526 break;
2527 }
2528
2529 ptr++; // advance over the trailing '\0'
2530 event_start = ptr; // record where the next event starts
2531
2532 dest_path_len = ptr - dest_path;
2533 //
2534 // If the destination inode number is non-zero, generate a rename
2535 // with both source and destination FSE_ARG_FINFO. Otherwise generate
2536 // a rename with only one FSE_ARG_FINFO. If you need to inject an
2537 // exchange with an inode of zero, just make that inode (and its path)
2538 // come in as the first one, not the second.
2539 //
2540 if (dest_finfo->ino) {
2541 err = add_fsevent(type, ctx,
2542 FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2543 FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2544 FSE_ARG_DONE);
2545 } else {
2546 err = add_fsevent(type, ctx,
2547 FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2548 FSE_ARG_STRING, dest_path_len, dest_path,
2549 FSE_ARG_DONE);
2550 }
2551
2552 if (err) {
2553 break;
2554 }
2555 }
2556
2557 // if the last event wasn't complete, set the remainder
2558 // to be the last event start boundary.
2559 //
2560 *remainder = (long)((buffer + bufsize) - event_start);
2561
2562 return err;
2563 }
2564
2565
2566 //
2567 // Note: this buffer size can not ever be less than
2568 // 2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
2569 // because that is the max size for a single event.
2570 // I made it 4k to be a "nice" size. making it
2571 // smaller is not a good idea.
2572 //
2573 #define WRITE_BUFFER_SIZE 4096
2574 static char *write_buffer = NULL;
2575
2576 static int
fseventswrite(__unused dev_t dev,struct uio * uio,__unused int ioflag)2577 fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
2578 {
2579 int error = 0;
2580 size_t count, offset = 0, remainder = 0;
2581 vfs_context_t ctx = vfs_context_current();
2582
2583 lck_mtx_lock(&event_writer_lock);
2584
2585 if (write_buffer == NULL) {
2586 write_buffer = zalloc_permanent(WRITE_BUFFER_SIZE, ZALIGN_64);
2587 }
2588
2589 //
2590 // this loop copies in and processes the events written.
2591 // it takes care to copy in reasonable size chunks and
2592 // process them. if there is an event that spans a chunk
2593 // boundary we're careful to copy those bytes down to the
2594 // beginning of the buffer and read the next chunk in just
2595 // after it.
2596 //
2597 while (uio_resid(uio)) {
2598 count = MIN(WRITE_BUFFER_SIZE - offset, (size_t)uio_resid(uio));
2599
2600 error = uiomove(write_buffer + offset, (int)count, uio);
2601 if (error) {
2602 break;
2603 }
2604
2605 error = parse_buffer_and_add_events(write_buffer, offset + count, ctx, &remainder);
2606 if (error) {
2607 break;
2608 }
2609
2610 //
2611 // if there's any remainder, copy it down to the beginning
2612 // of the buffer so that it will get processed the next time
2613 // through the loop. note that the remainder always starts
2614 // at an event boundary.
2615 //
2616 memmove(write_buffer, (write_buffer + count + offset) - remainder, remainder);
2617 offset = remainder;
2618 }
2619
2620 lck_mtx_unlock(&event_writer_lock);
2621
2622 return error;
2623 }
2624
2625
2626 static const struct fileops fsevents_fops = {
2627 .fo_type = DTYPE_FSEVENTS,
2628 .fo_read = fseventsf_read,
2629 .fo_write = fo_no_write,
2630 .fo_ioctl = fseventsf_ioctl,
2631 .fo_select = fseventsf_select,
2632 .fo_close = fseventsf_close,
2633 .fo_kqfilter = fseventsf_kqfilter,
2634 .fo_drain = fseventsf_drain,
2635 };
2636
2637 typedef struct fsevent_clone_args32 {
2638 user32_addr_t event_list;
2639 int32_t num_events;
2640 int32_t event_queue_depth;
2641 user32_addr_t fd;
2642 } fsevent_clone_args32;
2643
2644 typedef struct fsevent_clone_args64 {
2645 user64_addr_t event_list;
2646 int32_t num_events;
2647 int32_t event_queue_depth;
2648 user64_addr_t fd;
2649 } fsevent_clone_args64;
2650
2651 #define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32)
2652 #define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64)
2653
2654 static int
fseventsioctl(__unused dev_t dev,u_long cmd,caddr_t data,__unused int flag,struct proc * p)2655 fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2656 {
2657 struct fileproc *f;
2658 int fd, error;
2659 fsevent_handle *fseh = NULL;
2660 fsevent_clone_args64 *fse_clone_args, _fse_clone;
2661 int8_t *event_list;
2662 int is64bit = proc_is64bit(p);
2663
2664 switch (cmd) {
2665 case FSEVENTS_CLONE_32: {
2666 if (is64bit) {
2667 return EINVAL;
2668 }
2669 fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
2670
2671 fse_clone_args = &_fse_clone;
2672 memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
2673
2674 fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list);
2675 fse_clone_args->num_events = args32->num_events;
2676 fse_clone_args->event_queue_depth = args32->event_queue_depth;
2677 fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd);
2678 goto handle_clone;
2679 }
2680
2681 case FSEVENTS_CLONE_64:
2682 if (!is64bit) {
2683 return EINVAL;
2684 }
2685 fse_clone_args = (fsevent_clone_args64 *)data;
2686
2687 handle_clone:
2688 if (fse_clone_args->num_events <= 0 || fse_clone_args->num_events > 4096) {
2689 return EINVAL;
2690 }
2691
2692 fseh = kalloc_type(fsevent_handle, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2693
2694 klist_init(&fseh->knotes);
2695
2696 event_list = kalloc_data(fse_clone_args->num_events * sizeof(int8_t), Z_WAITOK);
2697 if (event_list == NULL) {
2698 kfree_type(fsevent_handle, fseh);
2699 return ENOMEM;
2700 }
2701
2702 error = copyin((user_addr_t)fse_clone_args->event_list,
2703 (void *)event_list,
2704 fse_clone_args->num_events * sizeof(int8_t));
2705 if (error) {
2706 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2707 kfree_type(fsevent_handle, fseh);
2708 return error;
2709 }
2710
2711 /*
2712 * Lock down the user's "fd" result buffer so it's safe
2713 * to hold locks while we copy it out.
2714 */
2715 error = vslock((user_addr_t)fse_clone_args->fd,
2716 sizeof(int32_t));
2717 if (error) {
2718 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2719 kfree_type(fsevent_handle, fseh);
2720 return error;
2721 }
2722
2723 error = add_watcher(event_list,
2724 fse_clone_args->num_events,
2725 fse_clone_args->event_queue_depth,
2726 &fseh->watcher,
2727 fseh);
2728 if (error) {
2729 vsunlock((user_addr_t)fse_clone_args->fd,
2730 sizeof(int32_t), 0);
2731 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2732 kfree_type(fsevent_handle, fseh);
2733 return error;
2734 }
2735
2736 fseh->watcher->fseh = fseh;
2737
2738 error = falloc(p, &f, &fd, vfs_context_current());
2739 if (error) {
2740 remove_watcher(fseh->watcher);
2741 vsunlock((user_addr_t)fse_clone_args->fd,
2742 sizeof(int32_t), 0);
2743 kfree_data(event_list, fse_clone_args->num_events * sizeof(int8_t));
2744 kfree_type(fsevent_handle, fseh);
2745 return error;
2746 }
2747 proc_fdlock(p);
2748 f->fp_glob->fg_flag = FREAD | FWRITE;
2749 f->fp_glob->fg_ops = &fsevents_fops;
2750 fp_set_data(f, fseh);
2751
2752 /*
2753 * We can safely hold the proc_fdlock across this copyout()
2754 * because of the vslock() call above. The vslock() call
2755 * also ensures that we will never get an error, so assert
2756 * this.
2757 */
2758 error = copyout((void *)&fd, (user_addr_t)fse_clone_args->fd, sizeof(int32_t));
2759 assert(error == 0);
2760
2761 procfdtbl_releasefd(p, fd, NULL);
2762 fp_drop(p, fd, f, 1);
2763 proc_fdunlock(p);
2764
2765 vsunlock((user_addr_t)fse_clone_args->fd,
2766 sizeof(int32_t), 1);
2767 break;
2768
2769 default:
2770 error = EINVAL;
2771 break;
2772 }
2773
2774 return error;
2775 }
2776
2777 static void
fsevents_wakeup(fs_event_watcher * watcher)2778 fsevents_wakeup(fs_event_watcher *watcher)
2779 {
2780 selwakeup(&watcher->fseh->si);
2781 KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
2782 wakeup((caddr_t)watcher);
2783 }
2784
2785
2786 /*
2787 * A struct describing which functions will get invoked for certain
2788 * actions.
2789 */
2790 static const struct cdevsw fsevents_cdevsw =
2791 {
2792 .d_open = fseventsopen,
2793 .d_close = fseventsclose,
2794 .d_read = fseventsread,
2795 .d_write = fseventswrite,
2796 .d_ioctl = fseventsioctl,
2797 .d_stop = eno_stop,
2798 .d_reset = eno_reset,
2799 .d_select = eno_select,
2800 .d_mmap = eno_mmap,
2801 .d_strategy = eno_strat,
2802 .d_reserved_1 = eno_getc,
2803 .d_reserved_2 = eno_putc,
2804 };
2805
2806
2807 /*
2808 * Called to initialize our device,
2809 * and to register ourselves with devfs
2810 */
2811
2812 void
fsevents_init(void)2813 fsevents_init(void)
2814 {
2815 int ret;
2816
2817 if (fsevents_installed) {
2818 return;
2819 }
2820
2821 fsevents_installed = 1;
2822
2823 ret = cdevsw_add(-1, &fsevents_cdevsw);
2824 if (ret < 0) {
2825 fsevents_installed = 0;
2826 return;
2827 }
2828
2829 devfs_make_node(makedev(ret, 0), DEVFS_CHAR,
2830 UID_ROOT, GID_WHEEL, 0644, "fsevents");
2831
2832 fsevents_internal_init();
2833 }
2834
2835
2836 char *
get_pathbuff(void)2837 get_pathbuff(void)
2838 {
2839 return zalloc(ZV_NAMEI);
2840 }
2841
2842 void
release_pathbuff(char * path)2843 release_pathbuff(char *path)
2844 {
2845 if (path == NULL) {
2846 return;
2847 }
2848 zfree(ZV_NAMEI, path);
2849 }
2850
2851 int
get_fse_info(struct vnode * vp,fse_info * fse,__unused vfs_context_t ctx)2852 get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
2853 {
2854 struct vnode_attr va;
2855
2856 VATTR_INIT(&va);
2857 VATTR_WANTED(&va, va_fsid);
2858 va.va_vaflags |= VA_REALFSID;
2859 VATTR_WANTED(&va, va_fileid);
2860 VATTR_WANTED(&va, va_mode);
2861 VATTR_WANTED(&va, va_uid);
2862 VATTR_WANTED(&va, va_document_id);
2863 if (vp->v_flag & VISHARDLINK) {
2864 if (vp->v_type == VDIR) {
2865 VATTR_WANTED(&va, va_dirlinkcount);
2866 } else {
2867 VATTR_WANTED(&va, va_nlink);
2868 }
2869 }
2870
2871 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
2872 memset(fse, 0, sizeof(fse_info));
2873 return -1;
2874 }
2875
2876 return vnode_get_fse_info_from_vap(vp, fse, &va);
2877 }
2878
2879 int
vnode_get_fse_info_from_vap(vnode_t vp,fse_info * fse,struct vnode_attr * vap)2880 vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap)
2881 {
2882 fse->ino = (ino64_t)vap->va_fileid;
2883 fse->dev = (dev_t)vap->va_fsid;
2884 fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
2885 fse->uid = (uid_t)vap->va_uid;
2886 fse->document_id = (uint32_t)vap->va_document_id;
2887 if (vp->v_flag & VISHARDLINK) {
2888 fse->mode |= FSE_MODE_HLINK;
2889 if (vp->v_type == VDIR) {
2890 fse->nlink = (uint64_t)vap->va_dirlinkcount;
2891 } else {
2892 fse->nlink = (uint64_t)vap->va_nlink;
2893 }
2894 }
2895
2896 return 0;
2897 }
2898
2899 void
create_fsevent_from_kevent(vnode_t vp,uint32_t kevents,struct vnode_attr * vap)2900 create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
2901 {
2902 int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
2903 char pathbuf[MAXPATHLEN];
2904 fse_info fse;
2905
2906
2907 if (kevents & VNODE_EVENT_DELETE) {
2908 fsevent_type = FSE_DELETE;
2909 } else if (kevents & (VNODE_EVENT_EXTEND | VNODE_EVENT_WRITE)) {
2910 fsevent_type = FSE_CONTENT_MODIFIED;
2911 } else if (kevents & VNODE_EVENT_LINK) {
2912 fsevent_type = FSE_CREATE_FILE;
2913 } else if (kevents & VNODE_EVENT_RENAME) {
2914 fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
2915 } else if (kevents & (VNODE_EVENT_FILE_CREATED | VNODE_EVENT_FILE_REMOVED | VNODE_EVENT_DIR_CREATED | VNODE_EVENT_DIR_REMOVED)) {
2916 fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
2917 } else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
2918 fsevent_type = FSE_STAT_CHANGED;
2919 }
2920
2921 // printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
2922
2923 fse.dev = vap->va_fsid;
2924 fse.ino = vap->va_fileid;
2925 fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode;
2926 if (vp->v_flag & VISHARDLINK) {
2927 fse.mode |= FSE_MODE_HLINK;
2928 if (vp->v_type == VDIR) {
2929 fse.nlink = vap->va_dirlinkcount;
2930 } else {
2931 fse.nlink = vap->va_nlink;
2932 }
2933 }
2934
2935 if (vp->v_type == VDIR) {
2936 fse.mode |= FSE_REMOTE_DIR_EVENT;
2937 }
2938
2939
2940 fse.uid = vap->va_uid;
2941 fse.document_id = vap->va_document_id;
2942
2943 len = sizeof(pathbuf);
2944 if (vn_getpath_no_firmlink(vp, pathbuf, &len) == 0) {
2945 add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
2946 }
2947 return;
2948 }
2949
2950 #else /* CONFIG_FSE */
2951
2952 #include <sys/fsevents.h>
2953
2954 /*
2955 * The get_pathbuff and release_pathbuff routines are used in places not
2956 * related to fsevents, and it's a handy abstraction, so define trivial
2957 * versions that don't cache a pool of buffers. This way, we don't have
2958 * to conditionalize the callers, and they still get the advantage of the
2959 * pool of buffers if CONFIG_FSE is turned on.
2960 */
2961 char *
get_pathbuff(void)2962 get_pathbuff(void)
2963 {
2964 return zalloc(ZV_NAMEI);
2965 }
2966
2967 void
release_pathbuff(char * path)2968 release_pathbuff(char *path)
2969 {
2970 zfree(ZV_NAMEI, path);
2971 }
2972
2973 int
add_fsevent(__unused int type,__unused vfs_context_t ctx,...)2974 add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
2975 {
2976 return 0;
2977 }
2978
2979 int
need_fsevent(__unused int type,__unused vnode_t vp)2980 need_fsevent(__unused int type, __unused vnode_t vp)
2981 {
2982 return 0;
2983 }
2984
2985 #endif /* CONFIG_FSE */
2986