xref: /xnu-8019.80.24/bsd/kern/decmpfs.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2008-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #if !FS_COMPRESSION
29 
30 /* We need these symbols even though compression is turned off */
31 
32 #define UNUSED_SYMBOL(x)        asm(".global _" #x "\n.set _" #x ", 0\n");
33 
34 UNUSED_SYMBOL(register_decmpfs_decompressor)
35 UNUSED_SYMBOL(unregister_decmpfs_decompressor)
36 UNUSED_SYMBOL(decmpfs_init)
37 UNUSED_SYMBOL(decmpfs_read_compressed)
38 UNUSED_SYMBOL(decmpfs_cnode_cmp_type)
39 UNUSED_SYMBOL(decmpfs_cnode_get_vnode_state)
40 UNUSED_SYMBOL(decmpfs_cnode_get_vnode_cached_size)
41 UNUSED_SYMBOL(decmpfs_cnode_get_vnode_cached_nchildren)
42 UNUSED_SYMBOL(decmpfs_cnode_get_vnode_cached_total_size)
43 UNUSED_SYMBOL(decmpfs_lock_compressed_data)
44 UNUSED_SYMBOL(decmpfs_cnode_free)
45 UNUSED_SYMBOL(decmpfs_cnode_alloc)
46 UNUSED_SYMBOL(decmpfs_cnode_destroy)
47 UNUSED_SYMBOL(decmpfs_decompress_file)
48 UNUSED_SYMBOL(decmpfs_unlock_compressed_data)
49 UNUSED_SYMBOL(decmpfs_cnode_init)
50 UNUSED_SYMBOL(decmpfs_cnode_set_vnode_state)
51 UNUSED_SYMBOL(decmpfs_hides_xattr)
52 UNUSED_SYMBOL(decmpfs_ctx)
53 UNUSED_SYMBOL(decmpfs_file_is_compressed)
54 UNUSED_SYMBOL(decmpfs_update_attributes)
55 UNUSED_SYMBOL(decmpfs_hides_rsrc)
56 UNUSED_SYMBOL(decmpfs_pagein_compressed)
57 UNUSED_SYMBOL(decmpfs_validate_compressed_file)
58 
59 #else /* FS_COMPRESSION */
60 #include <sys/kernel.h>
61 #include <sys/vnode_internal.h>
62 #include <sys/file_internal.h>
63 #include <sys/stat.h>
64 #include <sys/fcntl.h>
65 #include <sys/xattr.h>
66 #include <sys/namei.h>
67 #include <sys/user.h>
68 #include <sys/mount_internal.h>
69 #include <sys/ubc.h>
70 #include <sys/decmpfs.h>
71 #include <sys/uio_internal.h>
72 #include <libkern/OSByteOrder.h>
73 #include <libkern/section_keywords.h>
74 #include <sys/fsctl.h>
75 
76 #include <ptrauth.h>
77 
78 #pragma mark --- debugging ---
79 
80 #define COMPRESSION_DEBUG 0
81 #define COMPRESSION_DEBUG_VERBOSE 0
82 #define MALLOC_DEBUG 0
83 
84 #if COMPRESSION_DEBUG
85 static char*
86 vnpath(vnode_t vp, char *path, int len)
87 {
88 	int origlen = len;
89 	path[0] = 0;
90 	vn_getpath(vp, path, &len);
91 	path[origlen - 1] = 0;
92 	return path;
93 }
94 #endif
95 
96 #define ErrorLog(x, args...) \
97 	printf("%s:%d:%s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, ## args)
98 #if COMPRESSION_DEBUG
99 #define ErrorLogWithPath(x, args...) do { \
100 	char *path = zalloc(ZV_NAMEI); \
101 	printf("%s:%d:%s: %s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, \
102 	    vnpath(vp, path, PATH_MAX), ## args); \
103 	zfree(ZV_NAMEI, path); \
104 } while(0)
105 #else
106 #define ErrorLogWithPath(x, args...) do { \
107 	(void*)vp; \
108 	printf("%s:%d:%s: %s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, \
109 	    "<private>", ## args); \
110 } while(0)
111 #endif
112 
113 #if COMPRESSION_DEBUG
114 #define DebugLog ErrorLog
115 #define DebugLogWithPath ErrorLogWithPath
116 #else
117 #define DebugLog(x...) do { } while(0)
118 #define DebugLogWithPath(x...) do { } while(0)
119 #endif
120 
121 #if COMPRESSION_DEBUG_VERBOSE
122 #define VerboseLog ErrorLog
123 #define VerboseLogWithPath ErrorLogWithPath
124 #else
125 #define VerboseLog(x...) do { } while(0)
126 #define VerboseLogWithPath(x...) do { } while(0)
127 #endif
128 
129 #pragma mark --- globals ---
130 
131 static LCK_GRP_DECLARE(decmpfs_lockgrp, "VFSCOMP");
132 static LCK_RW_DECLARE(decompressorsLock, &decmpfs_lockgrp);
133 static LCK_MTX_DECLARE(decompress_channel_mtx, &decmpfs_lockgrp);
134 
135 static const decmpfs_registration *decompressors[CMP_MAX]; /* the registered compressors */
136 static int decompress_channel; /* channel used by decompress_file to wake up waiters */
137 
138 vfs_context_t decmpfs_ctx;
139 
140 #pragma mark --- decmp_get_func ---
141 
142 #define offsetof_func(func) ((uintptr_t)offsetof(decmpfs_registration, func))
143 
144 static void *
145 _func_from_offset(uint32_t type, uintptr_t offset, uint32_t discriminator)
146 {
147 	/* get the function at the given offset in the registration for the given type */
148 	const decmpfs_registration *reg = decompressors[type];
149 
150 	switch (reg->decmpfs_registration) {
151 	case DECMPFS_REGISTRATION_VERSION_V1:
152 		if (offset > offsetof_func(free_data)) {
153 			return NULL;
154 		}
155 		break;
156 	case DECMPFS_REGISTRATION_VERSION_V3:
157 		if (offset > offsetof_func(get_flags)) {
158 			return NULL;
159 		}
160 		break;
161 	default:
162 		return NULL;
163 	}
164 
165 	void *ptr = *(void * const *)((uintptr_t)reg + offset);
166 	if (ptr != NULL) {
167 		/* Resign as a function-in-void* */
168 		ptr = ptrauth_auth_and_resign(ptr, ptrauth_key_asia, discriminator, ptrauth_key_asia, 0);
169 	}
170 	return ptr;
171 }
172 
173 extern void IOServicePublishResource( const char * property, boolean_t value );
174 extern boolean_t IOServiceWaitForMatchingResource( const char * property, uint64_t timeout );
175 extern boolean_t IOCatalogueMatchingDriversPresent( const char * property );
176 
177 static void *
178 _decmp_get_func(vnode_t vp, uint32_t type, uintptr_t offset, uint32_t discriminator)
179 {
180 	/*
181 	 *  this function should be called while holding a shared lock to decompressorsLock,
182 	 *  and will return with the lock held
183 	 */
184 
185 	if (type >= CMP_MAX) {
186 		return NULL;
187 	}
188 
189 	if (decompressors[type] != NULL) {
190 		// the compressor has already registered but the function might be null
191 		return _func_from_offset(type, offset, discriminator);
192 	}
193 
194 	// does IOKit know about a kext that is supposed to provide this type?
195 	char providesName[80];
196 	snprintf(providesName, sizeof(providesName), "com.apple.AppleFSCompression.providesType%u", type);
197 	if (IOCatalogueMatchingDriversPresent(providesName)) {
198 		// there is a kext that says it will register for this type, so let's wait for it
199 		char resourceName[80];
200 		uint64_t delay = 10000000ULL; // 10 milliseconds.
201 		snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
202 		ErrorLogWithPath("waiting for %s\n", resourceName);
203 		while (decompressors[type] == NULL) {
204 			lck_rw_unlock_shared(&decompressorsLock); // we have to unlock to allow the kext to register
205 			if (IOServiceWaitForMatchingResource(resourceName, delay)) {
206 				lck_rw_lock_shared(&decompressorsLock);
207 				break;
208 			}
209 			if (!IOCatalogueMatchingDriversPresent(providesName)) {
210 				//
211 				ErrorLogWithPath("the kext with %s is no longer present\n", providesName);
212 				lck_rw_lock_shared(&decompressorsLock);
213 				break;
214 			}
215 			ErrorLogWithPath("still waiting for %s\n", resourceName);
216 			delay *= 2;
217 			lck_rw_lock_shared(&decompressorsLock);
218 		}
219 		// IOKit says the kext is loaded, so it should be registered too!
220 		if (decompressors[type] == NULL) {
221 			ErrorLogWithPath("we found %s, but the type still isn't registered\n", providesName);
222 			return NULL;
223 		}
224 		// it's now registered, so let's return the function
225 		return _func_from_offset(type, offset, discriminator);
226 	}
227 
228 	// the compressor hasn't registered, so it never will unless someone manually kextloads it
229 	ErrorLogWithPath("tried to access a compressed file of unregistered type %d\n", type);
230 	return NULL;
231 }
232 
233 #define decmp_get_func(vp, type, func) (typeof(decompressors[0]->func))_decmp_get_func(vp, type, offsetof_func(func), ptrauth_function_pointer_type_discriminator(typeof(decompressors[0]->func)))
234 
235 #pragma mark --- utilities ---
236 
237 #if COMPRESSION_DEBUG
238 static int
239 vnsize(vnode_t vp, uint64_t *size)
240 {
241 	struct vnode_attr va;
242 	VATTR_INIT(&va);
243 	VATTR_WANTED(&va, va_data_size);
244 	int error = vnode_getattr(vp, &va, decmpfs_ctx);
245 	if (error != 0) {
246 		ErrorLogWithPath("vnode_getattr err %d\n", error);
247 		return error;
248 	}
249 	*size = va.va_data_size;
250 	return 0;
251 }
252 #endif /* COMPRESSION_DEBUG */
253 
254 #pragma mark --- cnode routines ---
255 
256 ZONE_DECLARE(decmpfs_cnode_zone, "decmpfs_cnode",
257     sizeof(struct decmpfs_cnode), ZC_NONE);
258 
259 decmpfs_cnode *
260 decmpfs_cnode_alloc(void)
261 {
262 	return zalloc(decmpfs_cnode_zone);
263 }
264 
265 void
266 decmpfs_cnode_free(decmpfs_cnode *dp)
267 {
268 	zfree(decmpfs_cnode_zone, dp);
269 }
270 
271 void
272 decmpfs_cnode_init(decmpfs_cnode *cp)
273 {
274 	memset(cp, 0, sizeof(*cp));
275 	lck_rw_init(&cp->compressed_data_lock, &decmpfs_lockgrp, NULL);
276 }
277 
278 void
279 decmpfs_cnode_destroy(decmpfs_cnode *cp)
280 {
281 	lck_rw_destroy(&cp->compressed_data_lock, &decmpfs_lockgrp);
282 }
283 
284 bool
285 decmpfs_trylock_compressed_data(decmpfs_cnode *cp, int exclusive)
286 {
287 	void *thread = current_thread();
288 	bool retval = false;
289 
290 	if (cp->lockowner == thread) {
291 		/* this thread is already holding an exclusive lock, so bump the count */
292 		cp->lockcount++;
293 		retval = true;
294 	} else if (exclusive) {
295 		if ((retval = lck_rw_try_lock_exclusive(&cp->compressed_data_lock))) {
296 			cp->lockowner = thread;
297 			cp->lockcount = 1;
298 		}
299 	} else {
300 		if ((retval = lck_rw_try_lock_shared(&cp->compressed_data_lock))) {
301 			cp->lockowner = (void *)-1;
302 		}
303 	}
304 	return retval;
305 }
306 
307 void
308 decmpfs_lock_compressed_data(decmpfs_cnode *cp, int exclusive)
309 {
310 	void *thread = current_thread();
311 
312 	if (cp->lockowner == thread) {
313 		/* this thread is already holding an exclusive lock, so bump the count */
314 		cp->lockcount++;
315 	} else if (exclusive) {
316 		lck_rw_lock_exclusive(&cp->compressed_data_lock);
317 		cp->lockowner = thread;
318 		cp->lockcount = 1;
319 	} else {
320 		lck_rw_lock_shared(&cp->compressed_data_lock);
321 		cp->lockowner = (void *)-1;
322 	}
323 }
324 
325 void
326 decmpfs_unlock_compressed_data(decmpfs_cnode *cp, __unused int exclusive)
327 {
328 	void *thread = current_thread();
329 
330 	if (cp->lockowner == thread) {
331 		/* this thread is holding an exclusive lock, so decrement the count */
332 		if ((--cp->lockcount) > 0) {
333 			/* the caller still has outstanding locks, so we're done */
334 			return;
335 		}
336 		cp->lockowner = NULL;
337 	}
338 
339 	lck_rw_done(&cp->compressed_data_lock);
340 }
341 
342 uint32_t
343 decmpfs_cnode_get_vnode_state(decmpfs_cnode *cp)
344 {
345 	return cp->cmp_state;
346 }
347 
348 void
349 decmpfs_cnode_set_vnode_state(decmpfs_cnode *cp, uint32_t state, int skiplock)
350 {
351 	if (!skiplock) {
352 		decmpfs_lock_compressed_data(cp, 1);
353 	}
354 	cp->cmp_state = (uint8_t)state;
355 	if (state == FILE_TYPE_UNKNOWN) {
356 		/* clear out the compression type too */
357 		cp->cmp_type = 0;
358 	}
359 	if (!skiplock) {
360 		decmpfs_unlock_compressed_data(cp, 1);
361 	}
362 }
363 
364 static void
365 decmpfs_cnode_set_vnode_cmp_type(decmpfs_cnode *cp, uint32_t cmp_type, int skiplock)
366 {
367 	if (!skiplock) {
368 		decmpfs_lock_compressed_data(cp, 1);
369 	}
370 	cp->cmp_type = cmp_type;
371 	if (!skiplock) {
372 		decmpfs_unlock_compressed_data(cp, 1);
373 	}
374 }
375 
376 static void
377 decmpfs_cnode_set_vnode_minimal_xattr(decmpfs_cnode *cp, int minimal_xattr, int skiplock)
378 {
379 	if (!skiplock) {
380 		decmpfs_lock_compressed_data(cp, 1);
381 	}
382 	cp->cmp_minimal_xattr = !!minimal_xattr;
383 	if (!skiplock) {
384 		decmpfs_unlock_compressed_data(cp, 1);
385 	}
386 }
387 
388 uint64_t
389 decmpfs_cnode_get_vnode_cached_size(decmpfs_cnode *cp)
390 {
391 	return cp->uncompressed_size;
392 }
393 
394 uint64_t
395 decmpfs_cnode_get_vnode_cached_nchildren(decmpfs_cnode *cp)
396 {
397 	return cp->nchildren;
398 }
399 
400 uint64_t
401 decmpfs_cnode_get_vnode_cached_total_size(decmpfs_cnode *cp)
402 {
403 	return cp->total_size;
404 }
405 
406 void
407 decmpfs_cnode_set_vnode_cached_size(decmpfs_cnode *cp, uint64_t size)
408 {
409 	while (1) {
410 		uint64_t old = cp->uncompressed_size;
411 		if (OSCompareAndSwap64(old, size, (UInt64*)&cp->uncompressed_size)) {
412 			return;
413 		} else {
414 			/* failed to write our value, so loop */
415 		}
416 	}
417 }
418 
419 void
420 decmpfs_cnode_set_vnode_cached_nchildren(decmpfs_cnode *cp, uint64_t nchildren)
421 {
422 	while (1) {
423 		uint64_t old = cp->nchildren;
424 		if (OSCompareAndSwap64(old, nchildren, (UInt64*)&cp->nchildren)) {
425 			return;
426 		} else {
427 			/* failed to write our value, so loop */
428 		}
429 	}
430 }
431 
432 void
433 decmpfs_cnode_set_vnode_cached_total_size(decmpfs_cnode *cp, uint64_t total_sz)
434 {
435 	while (1) {
436 		uint64_t old = cp->total_size;
437 		if (OSCompareAndSwap64(old, total_sz, (UInt64*)&cp->total_size)) {
438 			return;
439 		} else {
440 			/* failed to write our value, so loop */
441 		}
442 	}
443 }
444 
445 static uint64_t
446 decmpfs_cnode_get_decompression_flags(decmpfs_cnode *cp)
447 {
448 	return cp->decompression_flags;
449 }
450 
451 static void
452 decmpfs_cnode_set_decompression_flags(decmpfs_cnode *cp, uint64_t flags)
453 {
454 	while (1) {
455 		uint64_t old = cp->decompression_flags;
456 		if (OSCompareAndSwap64(old, flags, (UInt64*)&cp->decompression_flags)) {
457 			return;
458 		} else {
459 			/* failed to write our value, so loop */
460 		}
461 	}
462 }
463 
464 uint32_t
465 decmpfs_cnode_cmp_type(decmpfs_cnode *cp)
466 {
467 	return cp->cmp_type;
468 }
469 
470 #pragma mark --- decmpfs state routines ---
471 
472 static int
473 decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **hdrOut, int returnInvalid, size_t *hdr_size)
474 {
475 	/*
476 	 *  fetches vp's compression xattr, converting it into a decmpfs_header; returns 0 or errno
477 	 *  if returnInvalid == 1, returns the header even if the type was invalid (out of range),
478 	 *  and return ERANGE in that case
479 	 */
480 
481 	size_t read_size             = 0;
482 	size_t attr_size             = 0;
483 	size_t alloc_size            = 0;
484 	uio_t attr_uio               = NULL;
485 	int err                      = 0;
486 	char *data                   = NULL;
487 	const bool no_additional_data = ((cp != NULL)
488 	    && (cp->cmp_type != 0)
489 	    && (cp->cmp_minimal_xattr != 0));
490 	uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
491 	decmpfs_header *hdr = NULL;
492 
493 	/*
494 	 * Trace the following parameters on entry with event-id 0x03120004
495 	 *
496 	 * @vp->v_id:       vnode-id for which to fetch compressed header.
497 	 * @no_additional_data: If set true then xattr didn't have any extra data.
498 	 * @returnInvalid:  return the header even though the type is out of range.
499 	 */
500 	DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FETCH_COMPRESSED_HEADER, vp->v_id,
501 	    no_additional_data, returnInvalid);
502 
503 	if (no_additional_data) {
504 		/* this file's xattr didn't have any extra data when we fetched it, so we can synthesize a header from the data in the cnode */
505 
506 		alloc_size = sizeof(decmpfs_header);
507 		data = kalloc_data(sizeof(decmpfs_header), Z_WAITOK);
508 		if (!data) {
509 			err = ENOMEM;
510 			goto out;
511 		}
512 		hdr = (decmpfs_header*)data;
513 		hdr->attr_size = sizeof(decmpfs_disk_header);
514 		hdr->compression_magic = DECMPFS_MAGIC;
515 		hdr->compression_type  = cp->cmp_type;
516 		if (hdr->compression_type == DATALESS_PKG_CMPFS_TYPE) {
517 			if (!vnode_isdir(vp)) {
518 				err = EINVAL;
519 				goto out;
520 			}
521 			hdr->_size.value = DECMPFS_PKG_VALUE_FROM_SIZE_COUNT(
522 				decmpfs_cnode_get_vnode_cached_size(cp),
523 				decmpfs_cnode_get_vnode_cached_nchildren(cp));
524 		} else if (vnode_isdir(vp)) {
525 			hdr->_size.value = decmpfs_cnode_get_vnode_cached_nchildren(cp);
526 		} else {
527 			hdr->_size.value = decmpfs_cnode_get_vnode_cached_size(cp);
528 		}
529 	} else {
530 		/* figure out how big the xattr is on disk */
531 		err = vn_getxattr(vp, DECMPFS_XATTR_NAME, NULL, &attr_size, XATTR_NOSECURITY, decmpfs_ctx);
532 		if (err != 0) {
533 			goto out;
534 		}
535 		alloc_size = attr_size + sizeof(hdr->attr_size);
536 
537 		if (attr_size < sizeof(decmpfs_disk_header) || attr_size > MAX_DECMPFS_XATTR_SIZE) {
538 			err = EINVAL;
539 			goto out;
540 		}
541 
542 		/* allocation includes space for the extra attr_size field of a compressed_header */
543 		data = (char *)kalloc_data(alloc_size, Z_WAITOK);
544 		if (!data) {
545 			err = ENOMEM;
546 			goto out;
547 		}
548 
549 		/* read the xattr into our buffer, skipping over the attr_size field at the beginning */
550 		attr_uio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
551 		uio_addiov(attr_uio, CAST_USER_ADDR_T(data + sizeof(hdr->attr_size)), attr_size);
552 
553 		err = vn_getxattr(vp, DECMPFS_XATTR_NAME, attr_uio, &read_size, XATTR_NOSECURITY, decmpfs_ctx);
554 		if (err != 0) {
555 			goto out;
556 		}
557 		if (read_size != attr_size) {
558 			err = EINVAL;
559 			goto out;
560 		}
561 		hdr = (decmpfs_header*)data;
562 		hdr->attr_size = (uint32_t)attr_size;
563 		/* swap the fields to native endian */
564 		hdr->compression_magic = OSSwapLittleToHostInt32(hdr->compression_magic);
565 		hdr->compression_type  = OSSwapLittleToHostInt32(hdr->compression_type);
566 		hdr->uncompressed_size = OSSwapLittleToHostInt64(hdr->uncompressed_size);
567 	}
568 
569 	if (hdr->compression_magic != DECMPFS_MAGIC) {
570 		ErrorLogWithPath("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC);
571 		err = EINVAL;
572 		goto out;
573 	}
574 
575 	/*
576 	 * Special-case the DATALESS compressor here; that is a valid type,
577 	 * even through there will never be an entry in the decompressor
578 	 * handler table for it.  If we don't do this, then the cmp_state
579 	 * for this cnode will end up being marked NOT_COMPRESSED, and
580 	 * we'll be stuck in limbo.
581 	 */
582 	if (hdr->compression_type >= CMP_MAX && !decmpfs_type_is_dataless(hdr->compression_type)) {
583 		if (returnInvalid) {
584 			/* return the header even though the type is out of range */
585 			err = ERANGE;
586 		} else {
587 			ErrorLogWithPath("compression_type %d out of range\n", hdr->compression_type);
588 			err = EINVAL;
589 		}
590 		goto out;
591 	}
592 
593 out:
594 	if (err && (err != ERANGE)) {
595 		DebugLogWithPath("err %d\n", err);
596 		kfree_data(data, alloc_size);
597 		*hdrOut = NULL;
598 	} else {
599 		*hdrOut = hdr;
600 		*hdr_size = alloc_size;
601 	}
602 	/*
603 	 * Trace the following parameters on return with event-id 0x03120004.
604 	 *
605 	 * @vp->v_id:       vnode-id for which to fetch compressed header.
606 	 * @err:            value returned from this function.
607 	 */
608 	DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FETCH_COMPRESSED_HEADER, vp->v_id, err);
609 	return err;
610 }
611 
612 static int
613 decmpfs_fast_get_state(decmpfs_cnode *cp)
614 {
615 	/*
616 	 *  return the cached state
617 	 *  this should *only* be called when we know that decmpfs_file_is_compressed has already been called,
618 	 *  because this implies that the cached state is valid
619 	 */
620 	int cmp_state = decmpfs_cnode_get_vnode_state(cp);
621 
622 	switch (cmp_state) {
623 	case FILE_IS_NOT_COMPRESSED:
624 	case FILE_IS_COMPRESSED:
625 	case FILE_IS_CONVERTING:
626 		return cmp_state;
627 	case FILE_TYPE_UNKNOWN:
628 		/*
629 		 *  we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode,
630 		 *  which should not be possible
631 		 */
632 		ErrorLog("decmpfs_fast_get_state called on unknown file\n");
633 		return FILE_IS_NOT_COMPRESSED;
634 	default:
635 		/* */
636 		ErrorLog("unknown cmp_state %d\n", cmp_state);
637 		return FILE_IS_NOT_COMPRESSED;
638 	}
639 }
640 
641 static int
642 decmpfs_fast_file_is_compressed(decmpfs_cnode *cp)
643 {
644 	int cmp_state = decmpfs_cnode_get_vnode_state(cp);
645 
646 	switch (cmp_state) {
647 	case FILE_IS_NOT_COMPRESSED:
648 		return 0;
649 	case FILE_IS_COMPRESSED:
650 	case FILE_IS_CONVERTING:
651 		return 1;
652 	case FILE_TYPE_UNKNOWN:
653 		/*
654 		 *  we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode,
655 		 *  which should not be possible
656 		 */
657 		ErrorLog("decmpfs_fast_get_state called on unknown file\n");
658 		return 0;
659 	default:
660 		/* */
661 		ErrorLog("unknown cmp_state %d\n", cmp_state);
662 		return 0;
663 	}
664 }
665 
666 errno_t
667 decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp)
668 {
669 	/* give a compressor a chance to indicate that a compressed file is invalid */
670 	decmpfs_header *hdr = NULL;
671 	size_t alloc_size = 0;
672 	errno_t err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
673 
674 	if (err) {
675 		/* we couldn't get the header */
676 		if (decmpfs_fast_get_state(cp) == FILE_IS_NOT_COMPRESSED) {
677 			/* the file is no longer compressed, so return success */
678 			err = 0;
679 		}
680 		goto out;
681 	}
682 
683 	if (!decmpfs_type_is_dataless(hdr->compression_type)) {
684 		lck_rw_lock_shared(&decompressorsLock);
685 		decmpfs_validate_compressed_file_func validate = decmp_get_func(vp, hdr->compression_type, validate);
686 		if (validate) { /* make sure this validation function is valid */
687 			/* is the data okay? */
688 			err = validate(vp, decmpfs_ctx, hdr);
689 		} else if (decmp_get_func(vp, hdr->compression_type, fetch) == NULL) {
690 			/* the type isn't registered */
691 			err = EIO;
692 		} else {
693 			/* no validate registered, so nothing to do */
694 			err = 0;
695 		}
696 		lck_rw_unlock_shared(&decompressorsLock);
697 	}
698 out:
699 	if (hdr != NULL) {
700 		kfree_data(hdr, alloc_size);
701 	}
702 #if COMPRESSION_DEBUG
703 	if (err) {
704 		DebugLogWithPath("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag);
705 	}
706 #endif
707 	return err;
708 }
709 
710 int
711 decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp)
712 {
713 	/*
714 	 *  determines whether vp points to a compressed file
715 	 *
716 	 *  to speed up this operation, we cache the result in the cnode, and do as little as possible
717 	 *  in the case where the cnode already has a valid cached state
718 	 *
719 	 */
720 
721 	int ret = 0;
722 	int error = 0;
723 	uint32_t cmp_state;
724 	struct vnode_attr va_fetch;
725 	decmpfs_header *hdr = NULL;
726 	size_t alloc_size = 0;
727 	mount_t mp = NULL;
728 	int cnode_locked = 0;
729 	int saveInvalid = 0; // save the header data even though the type was out of range
730 	uint64_t decompression_flags = 0;
731 	bool is_mounted, is_local_fs;
732 
733 	if (vnode_isnamedstream(vp)) {
734 		/*
735 		 *  named streams can't be compressed
736 		 *  since named streams of the same file share the same cnode,
737 		 *  we don't want to get/set the state in the cnode, just return 0
738 		 */
739 		return 0;
740 	}
741 
742 	/* examine the cached a state in this cnode */
743 	cmp_state = decmpfs_cnode_get_vnode_state(cp);
744 	switch (cmp_state) {
745 	case FILE_IS_NOT_COMPRESSED:
746 		return 0;
747 	case FILE_IS_COMPRESSED:
748 		return 1;
749 	case FILE_IS_CONVERTING:
750 		/* treat the file as compressed, because this gives us a way to block future reads until decompression is done */
751 		return 1;
752 	case FILE_TYPE_UNKNOWN:
753 		/* the first time we encountered this vnode, so we need to check it out */
754 		break;
755 	default:
756 		/* unknown state, assume file is not compressed */
757 		ErrorLogWithPath("unknown cmp_state %d\n", cmp_state);
758 		return 0;
759 	}
760 
761 	is_mounted = false;
762 	is_local_fs = false;
763 	mp = vnode_mount(vp);
764 	if (mp) {
765 		is_mounted = true;
766 	}
767 	if (is_mounted) {
768 		is_local_fs = ((mp->mnt_flag & MNT_LOCAL));
769 	}
770 	/*
771 	 * Trace the following parameters on entry with event-id 0x03120014.
772 	 *
773 	 * @vp->v_id:       vnode-id of the file being queried.
774 	 * @is_mounted:     set to true if @vp belongs to a mounted fs.
775 	 * @is_local_fs:    set to true if @vp belongs to local fs.
776 	 */
777 	DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FILE_IS_COMPRESSED, vp->v_id,
778 	    is_mounted, is_local_fs);
779 
780 	if (!is_mounted) {
781 		/*
782 		 *  this should only be true before we mount the root filesystem
783 		 *  we short-cut this return to avoid the call to getattr below, which
784 		 *  will fail before root is mounted
785 		 */
786 		ret = FILE_IS_NOT_COMPRESSED;
787 		goto done;
788 	}
789 
790 	if (!is_local_fs) {
791 		/* compression only supported on local filesystems */
792 		ret = FILE_IS_NOT_COMPRESSED;
793 		goto done;
794 	}
795 
796 	/* lock our cnode data so that another caller doesn't change the state under us */
797 	decmpfs_lock_compressed_data(cp, 1);
798 	cnode_locked = 1;
799 
800 	VATTR_INIT(&va_fetch);
801 	VATTR_WANTED(&va_fetch, va_flags);
802 	error = vnode_getattr(vp, &va_fetch, decmpfs_ctx);
803 	if (error) {
804 		/* failed to get the bsd flags so the file is not compressed */
805 		ret = FILE_IS_NOT_COMPRESSED;
806 		goto done;
807 	}
808 	if (va_fetch.va_flags & UF_COMPRESSED) {
809 		/* UF_COMPRESSED is on, make sure the file has the DECMPFS_XATTR_NAME xattr */
810 		error = decmpfs_fetch_compressed_header(vp, cp, &hdr, 1, &alloc_size);
811 		if ((hdr != NULL) && (error == ERANGE)) {
812 			saveInvalid = 1;
813 		}
814 		if (error) {
815 			/* failed to get the xattr so the file is not compressed */
816 			ret = FILE_IS_NOT_COMPRESSED;
817 			goto done;
818 		}
819 		/*
820 		 * We got the xattr, so the file is at least tagged compressed.
821 		 * For DATALESS, regular files and directories can be "compressed".
822 		 * For all other types, only files are allowed.
823 		 */
824 		if (!vnode_isreg(vp) &&
825 		    !(decmpfs_type_is_dataless(hdr->compression_type) && vnode_isdir(vp))) {
826 			ret = FILE_IS_NOT_COMPRESSED;
827 			goto done;
828 		}
829 		ret = FILE_IS_COMPRESSED;
830 		goto done;
831 	}
832 	/* UF_COMPRESSED isn't on, so the file isn't compressed */
833 	ret = FILE_IS_NOT_COMPRESSED;
834 
835 done:
836 	if (((ret == FILE_IS_COMPRESSED) || saveInvalid) && hdr) {
837 		/*
838 		 *  cache the uncompressed size away in the cnode
839 		 */
840 
841 		if (!cnode_locked) {
842 			/*
843 			 *  we should never get here since the only place ret is set to FILE_IS_COMPRESSED
844 			 *  is after the call to decmpfs_lock_compressed_data above
845 			 */
846 			decmpfs_lock_compressed_data(cp, 1);
847 			cnode_locked = 1;
848 		}
849 
850 		if (vnode_isdir(vp)) {
851 			decmpfs_cnode_set_vnode_cached_size(cp, 64);
852 			decmpfs_cnode_set_vnode_cached_nchildren(cp, decmpfs_get_directory_entries(hdr));
853 			if (hdr->compression_type == DATALESS_PKG_CMPFS_TYPE) {
854 				decmpfs_cnode_set_vnode_cached_total_size(cp, DECMPFS_PKG_SIZE(hdr->_size));
855 			}
856 		} else {
857 			decmpfs_cnode_set_vnode_cached_size(cp, hdr->uncompressed_size);
858 		}
859 		decmpfs_cnode_set_vnode_state(cp, ret, 1);
860 		decmpfs_cnode_set_vnode_cmp_type(cp, hdr->compression_type, 1);
861 		/* remember if the xattr's size was equal to the minimal xattr */
862 		if (hdr->attr_size == sizeof(decmpfs_disk_header)) {
863 			decmpfs_cnode_set_vnode_minimal_xattr(cp, 1, 1);
864 		}
865 		if (ret == FILE_IS_COMPRESSED) {
866 			/* update the ubc's size for this file */
867 			ubc_setsize(vp, hdr->uncompressed_size);
868 
869 			/* update the decompression flags in the decmpfs cnode */
870 			lck_rw_lock_shared(&decompressorsLock);
871 			decmpfs_get_decompression_flags_func get_flags = decmp_get_func(vp, hdr->compression_type, get_flags);
872 			if (get_flags) {
873 				decompression_flags = get_flags(vp, decmpfs_ctx, hdr);
874 			}
875 			lck_rw_unlock_shared(&decompressorsLock);
876 			decmpfs_cnode_set_decompression_flags(cp, decompression_flags);
877 		}
878 	} else {
879 		/* we might have already taken the lock above; if so, skip taking it again by passing cnode_locked as the skiplock parameter */
880 		decmpfs_cnode_set_vnode_state(cp, ret, cnode_locked);
881 	}
882 
883 	if (cnode_locked) {
884 		decmpfs_unlock_compressed_data(cp, 1);
885 	}
886 
887 	if (hdr != NULL) {
888 		kfree_data(hdr, alloc_size);
889 	}
890 
891 	/*
892 	 * Trace the following parameters on return with event-id 0x03120014.
893 	 *
894 	 * @vp->v_id:       vnode-id of the file being queried.
895 	 * @return:         set to 1 is file is compressed.
896 	 */
897 	switch (ret) {
898 	case FILE_IS_NOT_COMPRESSED:
899 		DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FILE_IS_COMPRESSED, vp->v_id, 0);
900 		return 0;
901 	case FILE_IS_COMPRESSED:
902 	case FILE_IS_CONVERTING:
903 		DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FILE_IS_COMPRESSED, vp->v_id, 1);
904 		return 1;
905 	default:
906 		/* unknown state, assume file is not compressed */
907 		DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FILE_IS_COMPRESSED, vp->v_id, 0);
908 		ErrorLogWithPath("unknown ret %d\n", ret);
909 		return 0;
910 	}
911 }
912 
913 int
914 decmpfs_update_attributes(vnode_t vp, struct vnode_attr *vap)
915 {
916 	int error = 0;
917 
918 	if (VATTR_IS_ACTIVE(vap, va_flags)) {
919 		/* the BSD flags are being updated */
920 		if (vap->va_flags & UF_COMPRESSED) {
921 			/* the compressed bit is being set, did it change? */
922 			struct vnode_attr va_fetch;
923 			int old_flags = 0;
924 			VATTR_INIT(&va_fetch);
925 			VATTR_WANTED(&va_fetch, va_flags);
926 			error = vnode_getattr(vp, &va_fetch, decmpfs_ctx);
927 			if (error) {
928 				return error;
929 			}
930 
931 			old_flags = va_fetch.va_flags;
932 
933 			if (!(old_flags & UF_COMPRESSED)) {
934 				/*
935 				 * Compression bit was turned on, make sure the file has the DECMPFS_XATTR_NAME attribute.
936 				 * This precludes anyone from using the UF_COMPRESSED bit for anything else, and it enforces
937 				 * an order of operation -- you must first do the setxattr and then the chflags.
938 				 */
939 
940 				if (VATTR_IS_ACTIVE(vap, va_data_size)) {
941 					/*
942 					 * don't allow the caller to set the BSD flag and the size in the same call
943 					 * since this doesn't really make sense
944 					 */
945 					vap->va_flags &= ~UF_COMPRESSED;
946 					return 0;
947 				}
948 
949 				decmpfs_header *hdr = NULL;
950 				size_t alloc_size = 0;
951 				error = decmpfs_fetch_compressed_header(vp, NULL, &hdr, 1, &alloc_size);
952 				if (error == 0) {
953 					/*
954 					 * Allow the flag to be set since the decmpfs attribute
955 					 * is present.
956 					 *
957 					 * If we're creating a dataless file we do not want to
958 					 * truncate it to zero which allows the file resolver to
959 					 * have more control over when truncation should happen.
960 					 * All other types of compressed files are truncated to
961 					 * zero.
962 					 */
963 					if (!decmpfs_type_is_dataless(hdr->compression_type)) {
964 						VATTR_SET_ACTIVE(vap, va_data_size);
965 						vap->va_data_size = 0;
966 					}
967 				} else if (error == ERANGE) {
968 					/* the file had a decmpfs attribute but the type was out of range, so don't muck with the file's data size */
969 				} else {
970 					/* no DECMPFS_XATTR_NAME attribute, so deny the update */
971 					vap->va_flags &= ~UF_COMPRESSED;
972 				}
973 				if (hdr != NULL) {
974 					kfree_data(hdr, alloc_size);
975 				}
976 			}
977 		}
978 	}
979 
980 	return 0;
981 }
982 
983 static int
984 wait_for_decompress(decmpfs_cnode *cp)
985 {
986 	int state;
987 	lck_mtx_lock(&decompress_channel_mtx);
988 	do {
989 		state = decmpfs_fast_get_state(cp);
990 		if (state != FILE_IS_CONVERTING) {
991 			/* file is not decompressing */
992 			lck_mtx_unlock(&decompress_channel_mtx);
993 			return state;
994 		}
995 		msleep((caddr_t)&decompress_channel, &decompress_channel_mtx, PINOD, "wait_for_decompress", NULL);
996 	} while (1);
997 }
998 
999 #pragma mark --- decmpfs hide query routines ---
1000 
1001 int
1002 decmpfs_hides_rsrc(vfs_context_t ctx, decmpfs_cnode *cp)
1003 {
1004 	/*
1005 	 *  WARNING!!!
1006 	 *  callers may (and do) pass NULL for ctx, so we should only use it
1007 	 *  for this equality comparison
1008 	 *
1009 	 *  This routine should only be called after a file has already been through decmpfs_file_is_compressed
1010 	 */
1011 
1012 	if (ctx == decmpfs_ctx) {
1013 		return 0;
1014 	}
1015 
1016 	if (!decmpfs_fast_file_is_compressed(cp)) {
1017 		return 0;
1018 	}
1019 
1020 	/* all compressed files hide their resource fork */
1021 	return 1;
1022 }
1023 
1024 int
1025 decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr)
1026 {
1027 	/*
1028 	 *  WARNING!!!
1029 	 *  callers may (and do) pass NULL for ctx, so we should only use it
1030 	 *  for this equality comparison
1031 	 *
1032 	 *  This routine should only be called after a file has already been through decmpfs_file_is_compressed
1033 	 */
1034 
1035 	if (ctx == decmpfs_ctx) {
1036 		return 0;
1037 	}
1038 	if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME) - 1) == 0) {
1039 		return decmpfs_hides_rsrc(ctx, cp);
1040 	}
1041 	if (!decmpfs_fast_file_is_compressed(cp)) {
1042 		/* file is not compressed, so don't hide this xattr */
1043 		return 0;
1044 	}
1045 	if (strncmp(xattr, DECMPFS_XATTR_NAME, sizeof(DECMPFS_XATTR_NAME) - 1) == 0) {
1046 		/* it's our xattr, so hide it */
1047 		return 1;
1048 	}
1049 	/* don't hide this xattr */
1050 	return 0;
1051 }
1052 
1053 #pragma mark --- registration/validation routines ---
1054 
1055 static inline int
1056 registration_valid(const decmpfs_registration *registration)
1057 {
1058 	return registration && ((registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V1) || (registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V3));
1059 }
1060 
1061 errno_t
1062 register_decmpfs_decompressor(uint32_t compression_type, const decmpfs_registration *registration)
1063 {
1064 	/* called by kexts to register decompressors */
1065 
1066 	errno_t ret = 0;
1067 	int locked = 0;
1068 	char resourceName[80];
1069 
1070 	if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
1071 		ret = EINVAL;
1072 		goto out;
1073 	}
1074 
1075 	lck_rw_lock_exclusive(&decompressorsLock); locked = 1;
1076 
1077 	/* make sure the registration for this type is zero */
1078 	if (decompressors[compression_type] != NULL) {
1079 		ret = EEXIST;
1080 		goto out;
1081 	}
1082 	decompressors[compression_type] = registration;
1083 	snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
1084 	IOServicePublishResource(resourceName, TRUE);
1085 
1086 out:
1087 	if (locked) {
1088 		lck_rw_unlock_exclusive(&decompressorsLock);
1089 	}
1090 	return ret;
1091 }
1092 
1093 errno_t
1094 unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration)
1095 {
1096 	/* called by kexts to unregister decompressors */
1097 
1098 	errno_t ret = 0;
1099 	int locked = 0;
1100 	char resourceName[80];
1101 
1102 	if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
1103 		ret = EINVAL;
1104 		goto out;
1105 	}
1106 
1107 	lck_rw_lock_exclusive(&decompressorsLock); locked = 1;
1108 	if (decompressors[compression_type] != registration) {
1109 		ret = EEXIST;
1110 		goto out;
1111 	}
1112 	decompressors[compression_type] = NULL;
1113 	snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
1114 	IOServicePublishResource(resourceName, FALSE);
1115 
1116 out:
1117 	if (locked) {
1118 		lck_rw_unlock_exclusive(&decompressorsLock);
1119 	}
1120 	return ret;
1121 }
1122 
1123 static int
1124 compression_type_valid(vnode_t vp, decmpfs_header *hdr)
1125 {
1126 	/* fast pre-check to determine if the given compressor has checked in */
1127 	int ret = 0;
1128 
1129 	/* every compressor must have at least a fetch function */
1130 	lck_rw_lock_shared(&decompressorsLock);
1131 	if (decmp_get_func(vp, hdr->compression_type, fetch) != NULL) {
1132 		ret = 1;
1133 	}
1134 	lck_rw_unlock_shared(&decompressorsLock);
1135 
1136 	return ret;
1137 }
1138 
1139 #pragma mark --- compression/decompression routines ---
1140 
1141 static int
1142 decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_cnode *cp, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
1143 {
1144 	/* get the uncompressed bytes for the specified region of vp by calling out to the registered compressor */
1145 
1146 	int err          = 0;
1147 
1148 	*bytes_read = 0;
1149 
1150 	if (offset >= (off_t)hdr->uncompressed_size) {
1151 		/* reading past end of file; nothing to do */
1152 		err = 0;
1153 		goto out;
1154 	}
1155 	if (offset < 0) {
1156 		/* tried to read from before start of file */
1157 		err = EINVAL;
1158 		goto out;
1159 	}
1160 	if (hdr->uncompressed_size - offset < size) {
1161 		/* adjust size so we don't read past the end of the file */
1162 		size = (user_ssize_t)(hdr->uncompressed_size - offset);
1163 	}
1164 	if (size == 0) {
1165 		/* nothing to read */
1166 		err = 0;
1167 		goto out;
1168 	}
1169 
1170 	/*
1171 	 * Trace the following parameters on entry with event-id 0x03120008.
1172 	 *
1173 	 * @vp->v_id:       vnode-id of the file being decompressed.
1174 	 * @hdr->compression_type: compression type.
1175 	 * @offset:         offset from where to fetch uncompressed data.
1176 	 * @size:           amount of uncompressed data to fetch.
1177 	 *
1178 	 * Please NOTE: @offset and @size can overflow in theory but
1179 	 * here it is safe.
1180 	 */
1181 	DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FETCH_UNCOMPRESSED_DATA, vp->v_id,
1182 	    hdr->compression_type, (int)offset, (int)size);
1183 	lck_rw_lock_shared(&decompressorsLock);
1184 	decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(vp, hdr->compression_type, fetch);
1185 	if (fetch) {
1186 		err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read);
1187 		lck_rw_unlock_shared(&decompressorsLock);
1188 		if (err == 0) {
1189 			uint64_t decompression_flags = decmpfs_cnode_get_decompression_flags(cp);
1190 			if (decompression_flags & DECMPFS_FLAGS_FORCE_FLUSH_ON_DECOMPRESS) {
1191 #if     !defined(__i386__) && !defined(__x86_64__)
1192 				int i;
1193 				for (i = 0; i < nvec; i++) {
1194 					assert(vec[i].size >= 0 && vec[i].size <= UINT_MAX);
1195 					flush_dcache64((addr64_t)(uintptr_t)vec[i].buf, (unsigned int)vec[i].size, FALSE);
1196 				}
1197 #endif
1198 			}
1199 		}
1200 	} else {
1201 		err = ENOTSUP;
1202 		lck_rw_unlock_shared(&decompressorsLock);
1203 	}
1204 	/*
1205 	 * Trace the following parameters on return with event-id 0x03120008.
1206 	 *
1207 	 * @vp->v_id:       vnode-id of the file being decompressed.
1208 	 * @bytes_read:     amount of uncompressed bytes fetched in bytes.
1209 	 * @err:            value returned from this function.
1210 	 *
1211 	 * Please NOTE: @bytes_read can overflow in theory but here it is safe.
1212 	 */
1213 	DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FETCH_UNCOMPRESSED_DATA, vp->v_id,
1214 	    (int)*bytes_read, err);
1215 out:
1216 	return err;
1217 }
1218 
1219 static kern_return_t
1220 commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abort)
1221 {
1222 	kern_return_t kr = 0;
1223 
1224 #if CONFIG_IOSCHED
1225 	upl_unmark_decmp(upl);
1226 #endif /* CONFIG_IOSCHED */
1227 
1228 	/* commit the upl pages */
1229 	if (abort) {
1230 		VerboseLog("aborting upl, flags 0x%08x\n", flags);
1231 		kr = ubc_upl_abort_range(upl, pl_offset, (upl_size_t)uplSize, flags);
1232 		if (kr != KERN_SUCCESS) {
1233 			ErrorLog("ubc_upl_abort_range error %d\n", (int)kr);
1234 		}
1235 	} else {
1236 		VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY);
1237 		kr = ubc_upl_commit_range(upl, pl_offset, (upl_size_t)uplSize, flags | UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_WRITTEN_BY_KERNEL);
1238 		if (kr != KERN_SUCCESS) {
1239 			ErrorLog("ubc_upl_commit_range error %d\n", (int)kr);
1240 		}
1241 	}
1242 	return kr;
1243 }
1244 
1245 
1246 errno_t
1247 decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp)
1248 {
1249 	/* handles a page-in request from vfs for a compressed file */
1250 
1251 	int err                      = 0;
1252 	vnode_t vp                   = ap->a_vp;
1253 	upl_t pl                     = ap->a_pl;
1254 	upl_offset_t pl_offset       = ap->a_pl_offset;
1255 	off_t f_offset               = ap->a_f_offset;
1256 	size_t size                  = ap->a_size;
1257 	int flags                    = ap->a_flags;
1258 	off_t uplPos                 = 0;
1259 	user_ssize_t uplSize         = 0;
1260 	user_ssize_t rounded_uplSize = 0;
1261 	size_t verify_block_size     = 0;
1262 	void *data                   = NULL;
1263 	decmpfs_header *hdr = NULL;
1264 	size_t alloc_size            = 0;
1265 	uint64_t cachedSize          = 0;
1266 	uint32_t fs_bsize            = 0;
1267 	int cmpdata_locked           = 0;
1268 	int  num_valid_pages         = 0;
1269 	int  num_invalid_pages       = 0;
1270 	bool file_tail_page_valid    = false;
1271 
1272 	if (!decmpfs_trylock_compressed_data(cp, 0)) {
1273 		return EAGAIN;
1274 	}
1275 	cmpdata_locked = 1;
1276 
1277 
1278 	if (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)) {
1279 		DebugLogWithPath("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)));
1280 	}
1281 
1282 	err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
1283 	if (err != 0) {
1284 		goto out;
1285 	}
1286 
1287 	cachedSize = hdr->uncompressed_size;
1288 
1289 	if (!compression_type_valid(vp, hdr)) {
1290 		/* compressor not registered */
1291 		err = ENOTSUP;
1292 		goto out;
1293 	}
1294 
1295 	/*
1296 	 * can't page-in from a negative offset
1297 	 * or if we're starting beyond the EOF
1298 	 * or if the file offset isn't page aligned
1299 	 * or the size requested isn't a multiple of PAGE_SIZE
1300 	 */
1301 	if (f_offset < 0 || f_offset >= cachedSize ||
1302 	    (f_offset & PAGE_MASK_64) || (size & PAGE_MASK) || (pl_offset & PAGE_MASK)) {
1303 #if 0
1304 		/* There should be a decmpfs equivalent of this cluster_pagein call */
1305 		if (f_offset >= cachedSize) {
1306 			kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CLUSTER, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CL_PGIN_PAST_EOF), 0 /* arg */);
1307 		}
1308 #endif
1309 		err = EINVAL;
1310 		goto out;
1311 	}
1312 
1313 	/*
1314 	 * If the verify block size is larger than the page size, the UPL needs
1315 	 * to be aligned to it, Since the UPL has been created by the filesystem,
1316 	 * we will only check if the passed in UPL length conforms to the
1317 	 * alignment requirements.
1318 	 */
1319 	err = VNOP_VERIFY(vp, f_offset, NULL, 0, &verify_block_size, NULL,
1320 	    VNODE_VERIFY_DEFAULT, NULL);
1321 	if (err) {
1322 		ErrorLogWithPath("VNOP_VERIFY returned error = %d\n", err);
1323 		goto out;
1324 	} else if (verify_block_size) {
1325 		if (vp->v_mount->mnt_vfsstat.f_bsize > PAGE_SIZE) {
1326 			fs_bsize = vp->v_mount->mnt_vfsstat.f_bsize;
1327 		}
1328 		if (verify_block_size & (verify_block_size - 1)) {
1329 			ErrorLogWithPath("verify block size (%zu) is not power of 2, no verification will be done\n", verify_block_size);
1330 			err = EINVAL;
1331 		} else if (size % verify_block_size) {
1332 			ErrorLogWithPath("upl size (%zu) is not a multiple of verify block size (%zu)\n", (size_t)size, verify_block_size);
1333 			err = EINVAL;
1334 		} else if (fs_bsize) {
1335 			/*
1336 			 * Filesystems requesting verification have to provide
1337 			 * values for block sizes which are powers of 2.
1338 			 */
1339 			if (fs_bsize & (fs_bsize - 1)) {
1340 				ErrorLogWithPath("FS block size (%u) is greater than PAGE_SIZE (%d) and is not power of 2, no verification will be done\n",
1341 				    fs_bsize, PAGE_SIZE);
1342 				err = EINVAL;
1343 			} else if (fs_bsize > verify_block_size) {
1344 				ErrorLogWithPath("FS block size (%u) is greater than verify block size (%zu), no verification will be done\n",
1345 				    fs_bsize, verify_block_size);
1346 				err = EINVAL;
1347 			}
1348 		}
1349 		if (err) {
1350 			goto out;
1351 		}
1352 	}
1353 
1354 #if CONFIG_IOSCHED
1355 	/* Mark the UPL as the requesting UPL for decompression */
1356 	upl_mark_decmp(pl);
1357 #endif /* CONFIG_IOSCHED */
1358 
1359 	/* map the upl so we can fetch into it */
1360 	kern_return_t kr = ubc_upl_map(pl, (vm_offset_t*)&data);
1361 	if ((kr != KERN_SUCCESS) || (data == NULL)) {
1362 		err = ENOSPC;
1363 		data = NULL;
1364 #if CONFIG_IOSCHED
1365 		upl_unmark_decmp(pl);
1366 #endif /* CONFIG_IOSCHED */
1367 		goto out;
1368 	}
1369 
1370 	uplPos = f_offset;
1371 	off_t max_size = cachedSize - f_offset;
1372 
1373 	if (size < max_size) {
1374 		rounded_uplSize = uplSize = size;
1375 		file_tail_page_valid = true;
1376 	} else {
1377 		uplSize = (user_ssize_t)max_size;
1378 		if (fs_bsize) {
1379 			/* First round up to fs_bsize */
1380 			rounded_uplSize = (uplSize + (fs_bsize - 1)) & ~(fs_bsize - 1);
1381 			/* then to PAGE_SIZE */
1382 			rounded_uplSize = MIN(size, round_page((vm_offset_t)rounded_uplSize));
1383 		} else {
1384 			rounded_uplSize = round_page((vm_offset_t)uplSize);
1385 		}
1386 	}
1387 
1388 	/* do the fetch */
1389 	decmpfs_vector vec;
1390 
1391 decompress:
1392 	/* the mapped data pointer points to the first page of the page list, so we want to start filling in at an offset of pl_offset */
1393 	vec = (decmpfs_vector) {
1394 		.buf = (char*)data + pl_offset,
1395 		.size = size,
1396 	};
1397 
1398 	uint64_t did_read = 0;
1399 	if (decmpfs_fast_get_state(cp) == FILE_IS_CONVERTING) {
1400 		ErrorLogWithPath("unexpected pagein during decompress\n");
1401 		/*
1402 		 *  if the file is converting, this must be a recursive call to pagein from underneath a call to decmpfs_decompress_file;
1403 		 *  pretend that it succeeded but don't do anything since we're just going to write over the pages anyway
1404 		 */
1405 		err = 0;
1406 	} else {
1407 		if (verify_block_size <= PAGE_SIZE) {
1408 			err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, uplPos, uplSize, 1, &vec, &did_read);
1409 			/* zero out whatever wasn't read */
1410 			if (did_read < rounded_uplSize) {
1411 				memset((char*)vec.buf + did_read, 0, (size_t)(rounded_uplSize - did_read));
1412 			}
1413 		} else {
1414 			off_t l_uplPos = uplPos;
1415 			off_t l_pl_offset = pl_offset;
1416 			user_ssize_t l_uplSize = uplSize;
1417 			upl_page_info_t *pl_info = ubc_upl_pageinfo(pl);
1418 
1419 			err = 0;
1420 			/*
1421 			 * When the system page size is less than the "verify block size",
1422 			 * the UPL passed may not consist solely of absent pages.
1423 			 * We have to detect the "absent" pages and only decompress
1424 			 * into those absent/invalid page ranges.
1425 			 *
1426 			 * Things that will change in each iteration of the loop :
1427 			 *
1428 			 * l_pl_offset = where we are inside the UPL [0, caller_upl_created_size)
1429 			 * l_uplPos = the file offset the l_pl_offset corresponds to.
1430 			 * l_uplSize = the size of the upl still unprocessed;
1431 			 *
1432 			 * In this picture, we have to do the transfer on 2 ranges
1433 			 * (One 2 page range and one 3 page range) and the loop
1434 			 * below will skip the first two pages and then identify
1435 			 * the next two as invalid and fill those in and
1436 			 * then skip the next one and then do the last pages.
1437 			 *
1438 			 *                          uplPos(file_offset)
1439 			 *                            |   uplSize
1440 			 * 0                          V<-------------->    file_size
1441 			 * |--------------------------------------------------->
1442 			 *                        | | |V|V|I|I|V|I|I|I|
1443 			 *                            ^
1444 			 *                            |    upl
1445 			 *                        <------------------->
1446 			 *                            |
1447 			 *                          pl_offset
1448 			 *
1449 			 * uplSize will be clipped in case the UPL range exceeds
1450 			 * the file size.
1451 			 *
1452 			 */
1453 			while (l_uplSize) {
1454 				uint64_t l_did_read = 0;
1455 				int pl_offset_pg = (int)(l_pl_offset / PAGE_SIZE);
1456 				int pages_left_in_upl;
1457 				int start_pg;
1458 				int last_pg;
1459 
1460 				/*
1461 				 * l_uplSize may start off less than the size of the upl,
1462 				 * we have to round it up to PAGE_SIZE to calculate
1463 				 * how many more pages are left.
1464 				 */
1465 				pages_left_in_upl = (int)(round_page((vm_offset_t)l_uplSize) / PAGE_SIZE);
1466 
1467 				/*
1468 				 * scan from the beginning of the upl looking for the first
1469 				 * non-valid page.... this will become the first page in
1470 				 * the request we're going to make to
1471 				 * 'decmpfs_fetch_uncompressed_data'... if all
1472 				 * of the pages are valid, we won't call through
1473 				 * to 'decmpfs_fetch_uncompressed_data'
1474 				 */
1475 				for (start_pg = 0; start_pg < pages_left_in_upl; start_pg++) {
1476 					if (!upl_valid_page(pl_info, pl_offset_pg + start_pg)) {
1477 						break;
1478 					}
1479 				}
1480 
1481 				num_valid_pages += start_pg;
1482 
1483 				/*
1484 				 * scan from the starting invalid page looking for
1485 				 * a valid page before the end of the upl is
1486 				 * reached, if we find one, then it will be the
1487 				 * last page of the request to 'decmpfs_fetch_uncompressed_data'
1488 				 */
1489 				for (last_pg = start_pg; last_pg < pages_left_in_upl; last_pg++) {
1490 					if (upl_valid_page(pl_info, pl_offset_pg + last_pg)) {
1491 						break;
1492 					}
1493 				}
1494 
1495 				if (start_pg < last_pg) {
1496 					off_t inval_offset = start_pg * PAGE_SIZE;
1497 					int inval_pages = last_pg - start_pg;
1498 					int inval_size = inval_pages * PAGE_SIZE;
1499 					decmpfs_vector l_vec;
1500 
1501 					num_invalid_pages += inval_pages;
1502 					if (inval_offset) {
1503 						did_read += inval_offset;
1504 						l_pl_offset += inval_offset;
1505 						l_uplPos += inval_offset;
1506 						l_uplSize -= inval_offset;
1507 					}
1508 
1509 					l_vec = (decmpfs_vector) {
1510 						.buf = (char*)data + l_pl_offset,
1511 						.size = inval_size,
1512 					};
1513 
1514 					err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, l_uplPos,
1515 					    MIN(l_uplSize, inval_size), 1, &l_vec, &l_did_read);
1516 
1517 					if (!err && (l_did_read != inval_size) && (l_uplSize > inval_size)) {
1518 						ErrorLogWithPath("Unexpected size fetch of decompressed data, l_uplSize = %d, l_did_read = %d, inval_size = %d\n",
1519 						    (int)l_uplSize, (int)l_did_read, (int)inval_size);
1520 						err = EINVAL;
1521 					}
1522 				} else {
1523 					/* no invalid pages left */
1524 					l_did_read = l_uplSize;
1525 					if (!file_tail_page_valid) {
1526 						file_tail_page_valid = true;
1527 					}
1528 				}
1529 
1530 				if (err) {
1531 					break;
1532 				}
1533 
1534 				did_read += l_did_read;
1535 				l_pl_offset += l_did_read;
1536 				l_uplPos += l_did_read;
1537 				l_uplSize -= l_did_read;
1538 			}
1539 
1540 			/* Zero out the region after EOF in the last page (if needed) */
1541 			if (!err && !file_tail_page_valid && (uplSize < rounded_uplSize)) {
1542 				memset((char*)vec.buf + uplSize, 0, (size_t)(rounded_uplSize - uplSize));
1543 			}
1544 		}
1545 	}
1546 	if (err) {
1547 		DebugLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
1548 		int cmp_state = decmpfs_fast_get_state(cp);
1549 		if (cmp_state == FILE_IS_CONVERTING) {
1550 			DebugLogWithPath("cmp_state == FILE_IS_CONVERTING\n");
1551 			cmp_state = wait_for_decompress(cp);
1552 			if (cmp_state == FILE_IS_COMPRESSED) {
1553 				DebugLogWithPath("cmp_state == FILE_IS_COMPRESSED\n");
1554 				/* a decompress was attempted but it failed, let's try calling fetch again */
1555 				goto decompress;
1556 			}
1557 		}
1558 		if (cmp_state == FILE_IS_NOT_COMPRESSED) {
1559 			DebugLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n");
1560 			/* the file was decompressed after we started reading it */
1561 			*is_compressed = 0; /* instruct caller to fall back to its normal path */
1562 		}
1563 	}
1564 
1565 	if (!err && verify_block_size) {
1566 		size_t cur_verify_block_size = verify_block_size;
1567 
1568 		if ((err = VNOP_VERIFY(vp, uplPos, vec.buf, rounded_uplSize, &cur_verify_block_size, NULL, 0, NULL))) {
1569 			ErrorLogWithPath("Verification failed with error %d, uplPos = %lld, uplSize = %d, did_read = %d, valid_pages = %d, invalid_pages = %d, tail_page_valid = %d\n",
1570 			    err, (long long)uplPos, (int)rounded_uplSize, (int)did_read, num_valid_pages, num_invalid_pages, file_tail_page_valid);
1571 		}
1572 		/* XXX : If the verify block size changes, redo the read */
1573 	}
1574 
1575 #if CONFIG_IOSCHED
1576 	upl_unmark_decmp(pl);
1577 #endif /* CONFIG_IOSCHED */
1578 
1579 	kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */
1580 	if (kr != KERN_SUCCESS) {
1581 		ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr);
1582 	} else {
1583 		if (!err) {
1584 			/* commit our pages */
1585 			kr = commit_upl(pl, pl_offset, (size_t)rounded_uplSize, UPL_COMMIT_FREE_ON_EMPTY, 0 /* commit */);
1586 			/* If there were any pages after the page containing EOF, abort them. */
1587 			if (rounded_uplSize < size) {
1588 				kr = commit_upl(pl, (upl_offset_t)(pl_offset + rounded_uplSize), (size_t)(size - rounded_uplSize),
1589 				    UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR, 1 /* abort */);
1590 			}
1591 		}
1592 	}
1593 
1594 out:
1595 	if (data) {
1596 		ubc_upl_unmap(pl);
1597 	}
1598 	if (hdr != NULL) {
1599 		kfree_data(hdr, alloc_size);
1600 	}
1601 	if (cmpdata_locked) {
1602 		decmpfs_unlock_compressed_data(cp, 0);
1603 	}
1604 	if (err) {
1605 #if 0
1606 		if (err != ENXIO && err != ENOSPC) {
1607 			char *path = zalloc(ZV_NAMEI);
1608 			panic("%s: decmpfs_pagein_compressed: err %d", vnpath(vp, path, PATH_MAX), err);
1609 			zfree(ZV_NAMEI, path);
1610 		}
1611 #endif /* 0 */
1612 		ErrorLogWithPath("err %d\n", err);
1613 	}
1614 	return err;
1615 }
1616 
1617 errno_t
1618 decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_cnode *cp)
1619 {
1620 	/* handles a read request from vfs for a compressed file */
1621 
1622 	uio_t uio                    = ap->a_uio;
1623 	vnode_t vp                   = ap->a_vp;
1624 	int err                      = 0;
1625 	int countInt                 = 0;
1626 	off_t uplPos                 = 0;
1627 	user_ssize_t uplSize         = 0;
1628 	user_ssize_t uplRemaining    = 0;
1629 	off_t curUplPos              = 0;
1630 	user_ssize_t curUplSize      = 0;
1631 	kern_return_t kr             = KERN_SUCCESS;
1632 	int abort_read               = 0;
1633 	void *data                   = NULL;
1634 	uint64_t did_read            = 0;
1635 	upl_t upl                    = NULL;
1636 	upl_page_info_t *pli         = NULL;
1637 	decmpfs_header *hdr          = NULL;
1638 	size_t alloc_size            = 0;
1639 	uint64_t cachedSize          = 0;
1640 	off_t uioPos                 = 0;
1641 	user_ssize_t uioRemaining    = 0;
1642 	size_t verify_block_size     = 0;
1643 	size_t alignment_size        = PAGE_SIZE;
1644 	int cmpdata_locked           = 0;
1645 
1646 	decmpfs_lock_compressed_data(cp, 0); cmpdata_locked = 1;
1647 
1648 	uplPos = uio_offset(uio);
1649 	uplSize = uio_resid(uio);
1650 	VerboseLogWithPath("uplPos %lld uplSize %lld\n", uplPos, uplSize);
1651 
1652 	cachedSize = decmpfs_cnode_get_vnode_cached_size(cp);
1653 
1654 	if ((uint64_t)uplPos + uplSize > cachedSize) {
1655 		/* truncate the read to the size of the file */
1656 		uplSize = (user_ssize_t)(cachedSize - uplPos);
1657 	}
1658 
1659 	/* give the cluster layer a chance to fill in whatever it already has */
1660 	countInt = (uplSize > INT_MAX) ? INT_MAX : (int)uplSize;
1661 	err = cluster_copy_ubc_data(vp, uio, &countInt, 0);
1662 	if (err != 0) {
1663 		goto out;
1664 	}
1665 
1666 	/* figure out what's left */
1667 	uioPos = uio_offset(uio);
1668 	uioRemaining = uio_resid(uio);
1669 	if ((uint64_t)uioPos + uioRemaining > cachedSize) {
1670 		/* truncate the read to the size of the file */
1671 		uioRemaining = (user_ssize_t)(cachedSize - uioPos);
1672 	}
1673 
1674 	if (uioRemaining <= 0) {
1675 		/* nothing left */
1676 		goto out;
1677 	}
1678 
1679 	err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
1680 	if (err != 0) {
1681 		goto out;
1682 	}
1683 	if (!compression_type_valid(vp, hdr)) {
1684 		err = ENOTSUP;
1685 		goto out;
1686 	}
1687 
1688 	uplPos = uioPos;
1689 	uplSize = uioRemaining;
1690 #if COMPRESSION_DEBUG
1691 	DebugLogWithPath("uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
1692 #endif
1693 
1694 	lck_rw_lock_shared(&decompressorsLock);
1695 	decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(vp, hdr->compression_type, adjust_fetch);
1696 	if (adjust_fetch) {
1697 		/* give the compressor a chance to adjust the portion of the file that we read */
1698 		adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize);
1699 		VerboseLogWithPath("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
1700 	}
1701 	lck_rw_unlock_shared(&decompressorsLock);
1702 
1703 	/* clip the adjusted size to the size of the file */
1704 	if ((uint64_t)uplPos + uplSize > cachedSize) {
1705 		/* truncate the read to the size of the file */
1706 		uplSize = (user_ssize_t)(cachedSize - uplPos);
1707 	}
1708 
1709 	if (uplSize <= 0) {
1710 		/* nothing left */
1711 		goto out;
1712 	}
1713 
1714 	/*
1715 	 *  since we're going to create a upl for the given region of the file,
1716 	 *  make sure we're on page boundaries
1717 	 */
1718 
1719 	/* If the verify block size is larger than the page size, the UPL needs to aligned to it */
1720 	err = VNOP_VERIFY(vp, uplPos, NULL, 0, &verify_block_size, NULL, VNODE_VERIFY_DEFAULT, NULL);
1721 	if (err) {
1722 		goto out;
1723 	} else if (verify_block_size) {
1724 		if (verify_block_size & (verify_block_size - 1)) {
1725 			ErrorLogWithPath("verify block size is not power of 2, no verification will be done\n");
1726 			verify_block_size = 0;
1727 		} else if (verify_block_size > PAGE_SIZE) {
1728 			alignment_size = verify_block_size;
1729 		}
1730 	}
1731 
1732 	if (uplPos & (alignment_size - 1)) {
1733 		/* round position down to page boundary */
1734 		uplSize += (uplPos & (alignment_size - 1));
1735 		uplPos &= ~(alignment_size - 1);
1736 	}
1737 
1738 	/* round size up to alignement_size multiple */
1739 	uplSize = (uplSize + (alignment_size - 1)) & ~(alignment_size - 1);
1740 
1741 	VerboseLogWithPath("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
1742 
1743 	uplRemaining = uplSize;
1744 	curUplPos = uplPos;
1745 	curUplSize = 0;
1746 
1747 	while (uplRemaining > 0) {
1748 		/* start after the last upl */
1749 		curUplPos += curUplSize;
1750 
1751 		/* clip to max upl size */
1752 		curUplSize = uplRemaining;
1753 		if (curUplSize > MAX_UPL_SIZE_BYTES) {
1754 			curUplSize = MAX_UPL_SIZE_BYTES;
1755 		}
1756 
1757 		/* create the upl */
1758 		kr = ubc_create_upl_kernel(vp, curUplPos, (int)curUplSize, &upl, &pli, UPL_SET_LITE, VM_KERN_MEMORY_FILE);
1759 		if (kr != KERN_SUCCESS) {
1760 			ErrorLogWithPath("ubc_create_upl error %d\n", (int)kr);
1761 			err = EINVAL;
1762 			goto out;
1763 		}
1764 		VerboseLogWithPath("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize);
1765 
1766 #if CONFIG_IOSCHED
1767 		/* Mark the UPL as the requesting UPL for decompression */
1768 		upl_mark_decmp(upl);
1769 #endif /* CONFIG_IOSCHED */
1770 
1771 		/* map the upl */
1772 		kr = ubc_upl_map(upl, (vm_offset_t*)&data);
1773 		if (kr != KERN_SUCCESS) {
1774 			commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
1775 #if 0
1776 			char *path = zalloc(ZV_NAMEI);
1777 			panic("%s: decmpfs_read_compressed: ubc_upl_map error %d", vnpath(vp, path, PATH_MAX), (int)kr);
1778 			zfree(ZV_NAMEI, path);
1779 #else /* 0 */
1780 			ErrorLogWithPath("ubc_upl_map kr=0x%x\n", (int)kr);
1781 #endif /* 0 */
1782 			err = EINVAL;
1783 			goto out;
1784 		}
1785 
1786 		/* make sure the map succeeded */
1787 		if (!data) {
1788 			commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
1789 
1790 			ErrorLogWithPath("ubc_upl_map mapped null\n");
1791 			err = EINVAL;
1792 			goto out;
1793 		}
1794 
1795 		/* fetch uncompressed data into the mapped upl */
1796 		decmpfs_vector vec;
1797 decompress:
1798 		vec = (decmpfs_vector){ .buf = data, .size = curUplSize };
1799 		err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, curUplPos, curUplSize, 1, &vec, &did_read);
1800 		if (err) {
1801 			ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
1802 
1803 			/* maybe the file is converting to decompressed */
1804 			int cmp_state = decmpfs_fast_get_state(cp);
1805 			if (cmp_state == FILE_IS_CONVERTING) {
1806 				ErrorLogWithPath("cmp_state == FILE_IS_CONVERTING\n");
1807 				cmp_state = wait_for_decompress(cp);
1808 				if (cmp_state == FILE_IS_COMPRESSED) {
1809 					ErrorLogWithPath("cmp_state == FILE_IS_COMPRESSED\n");
1810 					/* a decompress was attempted but it failed, let's try fetching again */
1811 					goto decompress;
1812 				}
1813 			}
1814 			if (cmp_state == FILE_IS_NOT_COMPRESSED) {
1815 				ErrorLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n");
1816 				/* the file was decompressed after we started reading it */
1817 				abort_read = 1; /* we're not going to commit our data */
1818 				*is_compressed = 0; /* instruct caller to fall back to its normal path */
1819 			}
1820 			kr = KERN_FAILURE;
1821 			did_read = 0;
1822 		}
1823 
1824 		/* zero out the remainder of the last page */
1825 		memset((char*)data + did_read, 0, (size_t)(curUplSize - did_read));
1826 		if (!err && verify_block_size) {
1827 			size_t cur_verify_block_size = verify_block_size;
1828 
1829 			if ((err = VNOP_VERIFY(vp, curUplPos, data, curUplSize, &cur_verify_block_size, NULL, 0, NULL))) {
1830 				ErrorLogWithPath("Verification failed with error %d\n", err);
1831 				abort_read = 1;
1832 			}
1833 			/* XXX : If the verify block size changes, redo the read */
1834 		}
1835 
1836 		kr = ubc_upl_unmap(upl);
1837 		if (kr == KERN_SUCCESS) {
1838 			if (abort_read) {
1839 				kr = commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
1840 			} else {
1841 				VerboseLogWithPath("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining);
1842 				if (uioRemaining) {
1843 					off_t uplOff = uioPos - curUplPos;
1844 					if (uplOff < 0) {
1845 						ErrorLogWithPath("uplOff %lld should never be negative\n", (int64_t)uplOff);
1846 						err = EINVAL;
1847 					} else if (uplOff > INT_MAX) {
1848 						ErrorLogWithPath("uplOff %lld too large\n", (int64_t)uplOff);
1849 						err = EINVAL;
1850 					} else {
1851 						off_t count = curUplPos + curUplSize - uioPos;
1852 						if (count < 0) {
1853 							/* this upl is entirely before the uio */
1854 						} else {
1855 							if (count > uioRemaining) {
1856 								count = uioRemaining;
1857 							}
1858 							int icount = (count > INT_MAX) ? INT_MAX : (int)count;
1859 							int io_resid = icount;
1860 							err = cluster_copy_upl_data(uio, upl, (int)uplOff, &io_resid);
1861 							int copied = icount - io_resid;
1862 							VerboseLogWithPath("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied);
1863 							if (err) {
1864 								ErrorLogWithPath("cluster_copy_upl_data err %d\n", err);
1865 							}
1866 							uioPos += copied;
1867 							uioRemaining -= copied;
1868 						}
1869 					}
1870 				}
1871 				kr = commit_upl(upl, 0, curUplSize, UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE, 0);
1872 				if (err) {
1873 					goto out;
1874 				}
1875 			}
1876 		} else {
1877 			ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr);
1878 		}
1879 
1880 		uplRemaining -= curUplSize;
1881 	}
1882 
1883 out:
1884 
1885 	if (hdr != NULL) {
1886 		kfree_data(hdr, alloc_size);
1887 	}
1888 	if (cmpdata_locked) {
1889 		decmpfs_unlock_compressed_data(cp, 0);
1890 	}
1891 	if (err) {/* something went wrong */
1892 		ErrorLogWithPath("err %d\n", err);
1893 		return err;
1894 	}
1895 
1896 #if COMPRESSION_DEBUG
1897 	uplSize = uio_resid(uio);
1898 	if (uplSize) {
1899 		VerboseLogWithPath("still %lld bytes to copy\n", uplSize);
1900 	}
1901 #endif
1902 	return 0;
1903 }
1904 
1905 int
1906 decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp)
1907 {
1908 	/*
1909 	 *  call out to the decompressor to free remove any data associated with this compressed file
1910 	 *  then delete the file's compression xattr
1911 	 */
1912 	decmpfs_header *hdr = NULL;
1913 	size_t alloc_size = 0;
1914 
1915 	/*
1916 	 * Trace the following parameters on entry with event-id 0x03120010.
1917 	 *
1918 	 * @vp->v_id:       vnode-id of the file for which to free compressed data.
1919 	 */
1920 	DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FREE_COMPRESSED_DATA, vp->v_id);
1921 
1922 	int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
1923 	if (err) {
1924 		ErrorLogWithPath("decmpfs_fetch_compressed_header err %d\n", err);
1925 	} else {
1926 		lck_rw_lock_shared(&decompressorsLock);
1927 		decmpfs_free_compressed_data_func free_data = decmp_get_func(vp, hdr->compression_type, free_data);
1928 		if (free_data) {
1929 			err = free_data(vp, decmpfs_ctx, hdr);
1930 		} else {
1931 			/* nothing to do, so no error */
1932 			err = 0;
1933 		}
1934 		lck_rw_unlock_shared(&decompressorsLock);
1935 
1936 		if (err != 0) {
1937 			ErrorLogWithPath("decompressor err %d\n", err);
1938 		}
1939 	}
1940 	/*
1941 	 * Trace the following parameters on return with event-id 0x03120010.
1942 	 *
1943 	 * @vp->v_id:       vnode-id of the file for which to free compressed data.
1944 	 * @err:            value returned from this function.
1945 	 */
1946 	DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_FREE_COMPRESSED_DATA, vp->v_id, err);
1947 
1948 	/* delete the xattr */
1949 	err = vn_removexattr(vp, DECMPFS_XATTR_NAME, 0, decmpfs_ctx);
1950 
1951 	if (hdr != NULL) {
1952 		kfree_data(hdr, alloc_size);
1953 	}
1954 	return err;
1955 }
1956 
1957 #pragma mark --- file conversion routines ---
1958 
1959 static int
1960 unset_compressed_flag(vnode_t vp)
1961 {
1962 	int err = 0;
1963 	struct vnode_attr va;
1964 	struct fsioc_cas_bsdflags cas;
1965 	int i;
1966 
1967 # define MAX_CAS_BSDFLAGS_LOOPS 4
1968 	/* UF_COMPRESSED should be manipulated only with FSIOC_CAS_BSDFLAGS */
1969 	for (i = 0; i < MAX_CAS_BSDFLAGS_LOOPS; i++) {
1970 		VATTR_INIT(&va);
1971 		VATTR_WANTED(&va, va_flags);
1972 		err = vnode_getattr(vp, &va, decmpfs_ctx);
1973 		if (err != 0) {
1974 			ErrorLogWithPath("vnode_getattr err %d, num retries %d\n", err, i);
1975 			goto out;
1976 		}
1977 
1978 		cas.expected_flags = va.va_flags;
1979 		cas.new_flags = va.va_flags & ~UF_COMPRESSED;
1980 		err = VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, (caddr_t)&cas, FWRITE, decmpfs_ctx);
1981 
1982 		if ((err == 0) && (va.va_flags == cas.actual_flags)) {
1983 			goto out;
1984 		}
1985 
1986 		if ((err != 0) && (err != EAGAIN)) {
1987 			break;
1988 		}
1989 	}
1990 
1991 	/* fallback to regular chflags if FSIOC_CAS_BSDFLAGS is not supported */
1992 	if (err == ENOTTY) {
1993 		VATTR_INIT(&va);
1994 		VATTR_SET(&va, va_flags, cas.new_flags);
1995 		err = vnode_setattr(vp, &va, decmpfs_ctx);
1996 		if (err != 0) {
1997 			ErrorLogWithPath("vnode_setattr err %d\n", err);
1998 		}
1999 	} else if (va.va_flags != cas.actual_flags) {
2000 		ErrorLogWithPath("FSIOC_CAS_BSDFLAGS err: flags mismatc. actual (%x) expected (%x), num retries %d\n", cas.actual_flags, va.va_flags, i);
2001 	} else if (err != 0) {
2002 		ErrorLogWithPath("FSIOC_CAS_BSDFLAGS err %d, num retries %d\n", err, i);
2003 	}
2004 
2005 out:
2006 	return err;
2007 }
2008 
2009 int
2010 decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncate_okay, int skiplock)
2011 {
2012 	/* convert a compressed file to an uncompressed file */
2013 
2014 	int err                      = 0;
2015 	char *data                   = NULL;
2016 	uio_t uio_w                  = 0;
2017 	off_t offset                 = 0;
2018 	uint32_t old_state           = 0;
2019 	uint32_t new_state           = 0;
2020 	int update_file_state        = 0;
2021 	size_t allocSize             = 0;
2022 	decmpfs_header *hdr          = NULL;
2023 	size_t hdr_size              = 0;
2024 	int cmpdata_locked           = 0;
2025 	off_t remaining              = 0;
2026 	uint64_t uncompressed_size   = 0;
2027 
2028 	/*
2029 	 * Trace the following parameters on entry with event-id 0x03120000.
2030 	 *
2031 	 * @vp->v_id:		vnode-id of the file being decompressed.
2032 	 * @toSize:		uncompress given bytes of the file.
2033 	 * @truncate_okay:	on error it is OK to truncate.
2034 	 * @skiplock:		compressed data is locked, skip locking again.
2035 	 *
2036 	 * Please NOTE: @toSize can overflow in theory but here it is safe.
2037 	 */
2038 	DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_DECOMPRESS_FILE, vp->v_id,
2039 	    (int)toSize, truncate_okay, skiplock);
2040 
2041 	if (!skiplock) {
2042 		decmpfs_lock_compressed_data(cp, 1); cmpdata_locked = 1;
2043 	}
2044 
2045 decompress:
2046 	old_state = decmpfs_fast_get_state(cp);
2047 
2048 	switch (old_state) {
2049 	case FILE_IS_NOT_COMPRESSED:
2050 	{
2051 		/* someone else decompressed the file */
2052 		err = 0;
2053 		goto out;
2054 	}
2055 
2056 	case FILE_TYPE_UNKNOWN:
2057 	{
2058 		/* the file is in an unknown state, so update the state and retry */
2059 		(void)decmpfs_file_is_compressed(vp, cp);
2060 
2061 		/* try again */
2062 		goto decompress;
2063 	}
2064 
2065 	case FILE_IS_COMPRESSED:
2066 	{
2067 		/* the file is compressed, so decompress it */
2068 		break;
2069 	}
2070 
2071 	default:
2072 	{
2073 		/*
2074 		 *  this shouldn't happen since multiple calls to decmpfs_decompress_file lock each other out,
2075 		 *  and when decmpfs_decompress_file returns, the state should be always be set back to
2076 		 *  FILE_IS_NOT_COMPRESSED or FILE_IS_UNKNOWN
2077 		 */
2078 		err = EINVAL;
2079 		goto out;
2080 	}
2081 	}
2082 
2083 	err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &hdr_size);
2084 	if (err != 0) {
2085 		goto out;
2086 	}
2087 
2088 	uncompressed_size = hdr->uncompressed_size;
2089 	if (toSize == -1) {
2090 		toSize = hdr->uncompressed_size;
2091 	}
2092 
2093 	if (toSize == 0) {
2094 		/* special case truncating the file to zero bytes */
2095 		goto nodecmp;
2096 	} else if ((uint64_t)toSize > hdr->uncompressed_size) {
2097 		/* the caller is trying to grow the file, so we should decompress all the data */
2098 		toSize = hdr->uncompressed_size;
2099 	}
2100 
2101 	allocSize = MIN(64 * 1024, (size_t)toSize);
2102 	data = (char *)kalloc_data(allocSize, Z_WAITOK);
2103 	if (!data) {
2104 		err = ENOMEM;
2105 		goto out;
2106 	}
2107 
2108 	uio_w = uio_create(1, 0LL, UIO_SYSSPACE, UIO_WRITE);
2109 	if (!uio_w) {
2110 		err = ENOMEM;
2111 		goto out;
2112 	}
2113 	uio_w->uio_flags |= UIO_FLAGS_IS_COMPRESSED_FILE;
2114 
2115 	remaining = toSize;
2116 
2117 	/* tell the buffer cache that this is an empty file */
2118 	ubc_setsize(vp, 0);
2119 
2120 	/* if we got here, we need to decompress the file */
2121 	decmpfs_cnode_set_vnode_state(cp, FILE_IS_CONVERTING, 1);
2122 
2123 	while (remaining > 0) {
2124 		/* loop decompressing data from the file and writing it into the data fork */
2125 
2126 		uint64_t bytes_read = 0;
2127 		decmpfs_vector vec = { .buf = data, .size = (user_ssize_t)MIN(allocSize, remaining) };
2128 		err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, offset, vec.size, 1, &vec, &bytes_read);
2129 		if (err != 0) {
2130 			ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
2131 			goto out;
2132 		}
2133 
2134 		if (bytes_read == 0) {
2135 			/* we're done reading data */
2136 			break;
2137 		}
2138 
2139 		uio_reset(uio_w, offset, UIO_SYSSPACE, UIO_WRITE);
2140 		err = uio_addiov(uio_w, CAST_USER_ADDR_T(data), (user_size_t)bytes_read);
2141 		if (err != 0) {
2142 			ErrorLogWithPath("uio_addiov err %d\n", err);
2143 			err = ENOMEM;
2144 			goto out;
2145 		}
2146 
2147 		err = VNOP_WRITE(vp, uio_w, 0, decmpfs_ctx);
2148 		if (err != 0) {
2149 			/* if the write failed, truncate the file to zero bytes */
2150 			ErrorLogWithPath("VNOP_WRITE err %d\n", err);
2151 			break;
2152 		}
2153 		offset += bytes_read;
2154 		remaining -= bytes_read;
2155 	}
2156 
2157 	if (err == 0) {
2158 		if (offset != toSize) {
2159 			ErrorLogWithPath("file decompressed to %lld instead of %lld\n", offset, toSize);
2160 			err = EINVAL;
2161 			goto out;
2162 		}
2163 	}
2164 
2165 	if (err == 0) {
2166 		/* sync the data and metadata */
2167 		err = VNOP_FSYNC(vp, MNT_WAIT, decmpfs_ctx);
2168 		if (err != 0) {
2169 			ErrorLogWithPath("VNOP_FSYNC err %d\n", err);
2170 			goto out;
2171 		}
2172 	}
2173 
2174 	if (err != 0) {
2175 		/* write, setattr, or fsync failed */
2176 		ErrorLogWithPath("aborting decompress, err %d\n", err);
2177 		if (truncate_okay) {
2178 			/* truncate anything we might have written */
2179 			int error = vnode_setsize(vp, 0, 0, decmpfs_ctx);
2180 			ErrorLogWithPath("vnode_setsize err %d\n", error);
2181 		}
2182 		goto out;
2183 	}
2184 
2185 nodecmp:
2186 	/* if we're truncating the file to zero bytes, we'll skip ahead to here */
2187 
2188 	/* unset the compressed flag */
2189 	unset_compressed_flag(vp);
2190 
2191 	/* free the compressed data associated with this file */
2192 	err = decmpfs_free_compressed_data(vp, cp);
2193 	if (err != 0) {
2194 		ErrorLogWithPath("decmpfs_free_compressed_data err %d\n", err);
2195 	}
2196 
2197 	/*
2198 	 *  even if free_compressed_data or vnode_getattr/vnode_setattr failed, return success
2199 	 *  since we succeeded in writing all of the file data to the data fork
2200 	 */
2201 	err = 0;
2202 
2203 	/* if we got this far, the file was successfully decompressed */
2204 	update_file_state = 1;
2205 	new_state = FILE_IS_NOT_COMPRESSED;
2206 
2207 #if COMPRESSION_DEBUG
2208 	{
2209 		uint64_t filesize = 0;
2210 		vnsize(vp, &filesize);
2211 		DebugLogWithPath("new file size %lld\n", filesize);
2212 	}
2213 #endif
2214 
2215 out:
2216 	if (hdr != NULL) {
2217 		kfree_data(hdr, hdr_size);
2218 	}
2219 	kfree_data(data, allocSize);
2220 
2221 	if (uio_w) {
2222 		uio_free(uio_w);
2223 	}
2224 
2225 	if (err != 0) {
2226 		/* if there was a failure, reset compression flags to unknown and clear the buffer cache data */
2227 		update_file_state = 1;
2228 		new_state = FILE_TYPE_UNKNOWN;
2229 		if (uncompressed_size) {
2230 			ubc_setsize(vp, 0);
2231 			ubc_setsize(vp, uncompressed_size);
2232 		}
2233 	}
2234 
2235 	if (update_file_state) {
2236 		lck_mtx_lock(&decompress_channel_mtx);
2237 		decmpfs_cnode_set_vnode_state(cp, new_state, 1);
2238 		wakeup((caddr_t)&decompress_channel); /* wake up anyone who might have been waiting for decompression */
2239 		lck_mtx_unlock(&decompress_channel_mtx);
2240 	}
2241 
2242 	if (cmpdata_locked) {
2243 		decmpfs_unlock_compressed_data(cp, 1);
2244 	}
2245 	/*
2246 	 * Trace the following parameters on return with event-id 0x03120000.
2247 	 *
2248 	 * @vp->v_id:	vnode-id of the file being decompressed.
2249 	 * @err:	value returned from this function.
2250 	 */
2251 	DECMPFS_EMIT_TRACE_RETURN(DECMPDBG_DECOMPRESS_FILE, vp->v_id, err);
2252 	return err;
2253 }
2254 
2255 #pragma mark --- Type1 compressor ---
2256 
2257 /*
2258  *  The "Type1" compressor stores the data fork directly in the compression xattr
2259  */
2260 
2261 static int
2262 decmpfs_validate_compressed_file_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr)
2263 {
2264 	int err          = 0;
2265 
2266 	if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) {
2267 		err = EINVAL;
2268 		goto out;
2269 	}
2270 out:
2271 	return err;
2272 }
2273 
2274 static int
2275 decmpfs_fetch_uncompressed_data_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
2276 {
2277 	int err          = 0;
2278 	int i;
2279 	user_ssize_t remaining;
2280 
2281 	if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) {
2282 		err = EINVAL;
2283 		goto out;
2284 	}
2285 
2286 #if COMPRESSION_DEBUG
2287 	static int dummy = 0; // prevent syslog from coalescing printfs
2288 	DebugLogWithPath("%d memcpy %lld at %lld\n", dummy++, size, (uint64_t)offset);
2289 #endif
2290 
2291 	remaining = size;
2292 	for (i = 0; (i < nvec) && (remaining > 0); i++) {
2293 		user_ssize_t curCopy = vec[i].size;
2294 		if (curCopy > remaining) {
2295 			curCopy = remaining;
2296 		}
2297 		memcpy(vec[i].buf, hdr->attr_bytes + offset, curCopy);
2298 		offset += curCopy;
2299 		remaining -= curCopy;
2300 	}
2301 
2302 	if ((bytes_read) && (err == 0)) {
2303 		*bytes_read = (size - remaining);
2304 	}
2305 
2306 out:
2307 	return err;
2308 }
2309 
2310 SECURITY_READ_ONLY_EARLY(static decmpfs_registration) Type1Reg =
2311 {
2312 	.decmpfs_registration = DECMPFS_REGISTRATION_VERSION,
2313 	.validate          = decmpfs_validate_compressed_file_Type1,
2314 	.adjust_fetch      = NULL,/* no adjust necessary */
2315 	.fetch             = decmpfs_fetch_uncompressed_data_Type1,
2316 	.free_data         = NULL,/* no free necessary */
2317 	.get_flags         = NULL/* no flags */
2318 };
2319 
2320 #pragma mark --- decmpfs initialization ---
2321 
2322 void
2323 decmpfs_init(void)
2324 {
2325 	static int done = 0;
2326 	if (done) {
2327 		return;
2328 	}
2329 
2330 	decmpfs_ctx = vfs_context_create(vfs_context_kernel());
2331 
2332 	register_decmpfs_decompressor(CMP_Type1, &Type1Reg);
2333 
2334 	done = 1;
2335 }
2336 #endif /* FS_COMPRESSION */
2337