xref: /xnu-8796.101.5/osfmk/vm/vm_compressor_algorithms.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* This module implements a hybrid/adaptive compression scheme, using WKdm where
29  * profitable and, currently, an LZ4 variant elsewhere.
30  * (Created 2016, Derek Kumar)
31  */
32 #include "lz4.h"
33 #include "WKdm_new.h"
34 #include <vm/vm_compressor_algorithms.h>
35 #include <vm/vm_compressor.h>
36 
37 #define MZV_MAGIC (17185)
38 #if defined(__arm64__)
39 #include <arm64/proc_reg.h>
40 #endif
41 
42 #define LZ4_SCRATCH_ALIGN (64)
43 #define WKC_SCRATCH_ALIGN (64)
44 
45 #define LZ4_SCRATCH_ALIGN (64)
46 #define WKC_SCRATCH_ALIGN (64)
47 
48 typedef union {
49 	uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
50 	uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO
51 } compressor_encode_scratch_t;
52 
53 typedef union {
54 	uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64)));
55 	uint8_t wkdecompscratch[0] __attribute((aligned(64)));
56 } compressor_decode_scratch_t;
57 
58 typedef struct {
59 	uint16_t lz4_selection_run;
60 	uint16_t lz4_run_length;
61 	uint16_t lz4_preselects;
62 	uint32_t lz4_total_preselects;
63 	uint16_t lz4_failure_skips;
64 	uint32_t lz4_total_failure_skips;
65 	uint16_t lz4_failure_run_length;
66 	uint16_t lz4_total_unprofitables;
67 	uint32_t lz4_total_negatives;
68 	uint32_t lz4_total_failures;
69 } compressor_state_t;
70 
71 compressor_tuneables_t vmctune = {
72 	.lz4_threshold = 2048,
73 	.wkdm_reeval_threshold = 1536,
74 	.lz4_max_failure_skips = 0,
75 	.lz4_max_failure_run_length = ~0U,
76 	.lz4_max_preselects = 0,
77 	.lz4_run_preselection_threshold = ~0U,
78 	.lz4_run_continue_bytes = 0,
79 	.lz4_profitable_bytes = 0,
80 };
81 
82 compressor_state_t vmcstate = {
83 	.lz4_selection_run = 0,
84 	.lz4_run_length = 0,
85 	.lz4_preselects = 0,
86 	.lz4_total_preselects = 0,
87 	.lz4_failure_skips = 0,
88 	.lz4_total_failure_skips = 0,
89 	.lz4_failure_run_length = 0,
90 	.lz4_total_unprofitables = 0,
91 	.lz4_total_negatives = 0,
92 };
93 
94 compressor_stats_t compressor_stats;
95 
96 enum compressor_preselect_t {
97 	CPRESELLZ4 = 0,
98 	CSKIPLZ4 = 1,
99 	CPRESELWK = 2,
100 };
101 
102 vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC;
103 
104 boolean_t vm_compressor_force_sw_wkdm = FALSE;
105 
106 boolean_t verbose = FALSE;
107 
108 #define VMDBGSTAT (DEBUG)
109 #if VMDBGSTATS
110 #define VM_COMPRESSOR_STAT_DBG(x...)                                    \
111 	do {                                                            \
112 	        (x);                                                    \
113 	} while(0)
114 #else
115 #define VM_COMPRESSOR_STAT_DBG(x...)                                    \
116 	do {                                                            \
117 	} while (0)
118 #endif
119 
120 #define VMCSTATS (DEVELOPMENT || DEBUG)
121 #if VMCSTATS
122 #define VM_COMPRESSOR_STAT(x...)                                        \
123 	do {                                                            \
124 	        (x);                                                    \
125 	} while(0)
126 //TODO make atomic where needed, decompression paths
127 #define VM_DECOMPRESSOR_STAT(x...)                                      \
128 	do {                                                            \
129 	        (x);                                                    \
130 	} while(0)
131 #else
132 #define VM_COMPRESSOR_STAT(x...)                                        \
133 	do {                                                            \
134 	}while (0)
135 #define VM_DECOMPRESSOR_STAT(x...)                                      \
136 	do {                                                            \
137 	}while (0)
138 #endif
139 
140 static inline enum compressor_preselect_t
compressor_preselect(void)141 compressor_preselect(void)
142 {
143 	if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) {
144 		vmcstate.lz4_failure_skips = 0;
145 		vmcstate.lz4_failure_run_length = 0;
146 	}
147 
148 	if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) {
149 		vmcstate.lz4_failure_skips++;
150 		vmcstate.lz4_total_failure_skips++;
151 		return CSKIPLZ4;
152 	}
153 
154 	if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) {
155 		vmcstate.lz4_preselects = 0;
156 		return CPRESELWK;
157 	}
158 
159 	if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) {
160 		vmcstate.lz4_preselects++;
161 		vmcstate.lz4_total_preselects++;
162 		return CPRESELLZ4;
163 	}
164 	return CPRESELWK;
165 }
166 
167 static inline void
compressor_selector_update(int lz4sz,int didwk,int wksz)168 compressor_selector_update(int lz4sz, int didwk, int wksz)
169 {
170 	VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++);
171 
172 	if (lz4sz == 0) {
173 		VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE);
174 		VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++);
175 		vmcstate.lz4_failure_run_length++;
176 		VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++);
177 		vmcstate.lz4_run_length = 0;
178 	} else {
179 		vmcstate.lz4_failure_run_length = 0;
180 
181 		VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz);
182 
183 		if (lz4sz <= vmctune.wkdm_reeval_threshold) {
184 			vmcstate.lz4_run_length = 0;
185 		} else {
186 			if (!didwk) {
187 				vmcstate.lz4_run_length++;
188 			}
189 		}
190 
191 		if (didwk) {
192 			if (__probable(wksz > lz4sz)) {
193 				uint32_t lz4delta = wksz - lz4sz;
194 				VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta);
195 				if (lz4delta >= vmctune.lz4_run_continue_bytes) {
196 					vmcstate.lz4_run_length++;
197 				} else if (lz4delta <= vmctune.lz4_profitable_bytes) {
198 					vmcstate.lz4_failure_run_length++;
199 					VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++);
200 					vmcstate.lz4_run_length = 0;
201 				} else {
202 					vmcstate.lz4_run_length = 0;
203 				}
204 			} else {
205 				VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz));
206 				vmcstate.lz4_failure_run_length++;
207 				VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++);
208 				vmcstate.lz4_run_length = 0;
209 			}
210 		}
211 	}
212 }
213 
214 
215 static inline void
WKdm_hv(uint32_t * wkbuf)216 WKdm_hv(uint32_t *wkbuf)
217 {
218 #if DEVELOPMENT || DEBUG
219 	uint32_t *inw = (uint32_t *) wkbuf;
220 	if (*inw != MZV_MAGIC) {
221 		if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) {
222 			panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x", wkbuf, *inw, *(inw + 1), *(inw + 2));
223 		}
224 	}
225 #else /* DEVELOPMENT || DEBUG */
226 	(void) wkbuf;
227 #endif
228 }
229 
230 //todo fix clang diagnostic
231 #pragma clang diagnostic push
232 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
233 
234 #if defined(__arm64__)
235 #endif
236 
237 static inline bool
WKdmD(WK_word * src_buf,WK_word * dest_buf,WK_word * scratch,unsigned int bytes,__unused uint32_t * pop_count)238 WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes,
239     __unused uint32_t *pop_count)
240 {
241 #if defined(__arm64__)
242 #endif
243 	WKdm_hv(src_buf);
244 #if defined(__arm64__)
245 #ifndef __ARM_16K_PG__
246 	if (PAGE_SIZE == 4096) {
247 		WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes);
248 	} else
249 #endif /* !____ARM_16K_PG__ */
250 	{
251 		__unused uint64_t wdsstart;
252 
253 		VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time());
254 		WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes);
255 
256 		VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart);
257 		VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++);
258 	}
259 #else /* !defined arm64 */
260 	WKdm_decompress_new(src_buf, dest_buf, scratch, bytes);
261 #endif
262 	return true;
263 }
264 #if DEVELOPMENT || DEBUG
265 int precompy, wkswhw;
266 #endif
267 
268 static inline int
WKdmC(WK_word * src_buf,WK_word * dest_buf,WK_word * scratch,boolean_t * incomp_copy,unsigned int limit,__unused uint32_t * pop_count)269 WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch,
270     boolean_t *incomp_copy, unsigned int limit, __unused uint32_t *pop_count)
271 {
272 	(void)incomp_copy;
273 	int wkcval;
274 #if defined(__arm64__)
275 #ifndef __ARM_16K_PG__
276 	if (PAGE_SIZE == 4096) {
277 		wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit);
278 	} else
279 #endif /* !____ARM_16K_PG__ */
280 	{
281 		__unused uint64_t wcswstart;
282 
283 		VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time());
284 
285 		int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit);
286 
287 		VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart);
288 		VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++);
289 		wkcval = wkswsz;
290 	}
291 #else
292 	wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit);
293 #endif
294 	return wkcval;
295 }
296 
297 
298 int
metacompressor(const uint8_t * in,uint8_t * cdst,int32_t outbufsz,uint16_t * codec,void * cscratchin,boolean_t * incomp_copy,uint32_t * pop_count_p)299 metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec,
300     void *cscratchin, boolean_t *incomp_copy, uint32_t *pop_count_p)
301 {
302 	int sz = -1;
303 	int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE;
304 	int insize = PAGE_SIZE;
305 	compressor_encode_scratch_t *cscratch = cscratchin;
306 	/* Not all paths lead to an inline population count. */
307 	uint32_t pop_count = C_SLOT_NO_POPCOUNT;
308 
309 	if (vm_compressor_current_codec == CMODE_WK) {
310 		dowk = TRUE;
311 	} else if (vm_compressor_current_codec == CMODE_LZ4) {
312 		dolz4 = TRUE;
313 	} else if (vm_compressor_current_codec == CMODE_HYB) {
314 		enum compressor_preselect_t presel = compressor_preselect();
315 		if (presel == CPRESELLZ4) {
316 			dolz4 = TRUE;
317 			goto lz4compress;
318 		} else if (presel == CSKIPLZ4) {
319 			dowk = TRUE;
320 			skiplz4 = TRUE;
321 		} else {
322 			assert(presel == CPRESELWK);
323 			dowk = TRUE;
324 		}
325 	}
326 
327 	if (dowk) {
328 		*codec = CCWK;
329 		VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++);
330 		sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz, &pop_count);
331 
332 		if (sz == -1) {
333 			VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE);
334 			VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++);
335 
336 			if (vm_compressor_current_codec == CMODE_HYB) {
337 				goto lz4eval;
338 			}
339 			goto cexit;
340 		} else if (sz == 0) {
341 			VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++);
342 			VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4);
343 		} else {
344 			VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz);
345 		}
346 	}
347 lz4eval:
348 	if (vm_compressor_current_codec == CMODE_HYB) {
349 		if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) {
350 			dolz4 = TRUE;
351 		} else {
352 #if DEVELOPMENT || DEBUG
353 			int wkc = (sz == -1) ? PAGE_SIZE : sz;
354 #endif
355 			VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++);
356 			VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc);
357 			goto cexit;
358 		}
359 	}
360 
361 lz4compress:
362 
363 	if (dolz4) {
364 		if (sz == -1) {
365 			sz = PAGE_SIZE;
366 		}
367 		int wksz = sz;
368 		*codec = CCLZ4;
369 
370 		sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]);
371 
372 		compressor_selector_update(sz, dowk, wksz);
373 		if (sz == 0) {
374 			sz = -1;
375 			goto cexit;
376 		}
377 	}
378 cexit:
379 	assert(pop_count_p != NULL);
380 	*pop_count_p = pop_count;
381 	return sz;
382 }
383 
384 bool
metadecompressor(const uint8_t * source,uint8_t * dest,uint32_t csize,uint16_t ccodec,void * compressor_dscratchin,uint32_t * pop_count_p)385 metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize,
386     uint16_t ccodec, void *compressor_dscratchin, uint32_t *pop_count_p)
387 {
388 	int dolz4 = (ccodec == CCLZ4);
389 	int rval;
390 	compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin;
391 	/* Not all paths lead to an inline population count. */
392 	uint32_t pop_count = C_SLOT_NO_POPCOUNT;
393 	bool success;
394 
395 	if (dolz4) {
396 		rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]);
397 		VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1);
398 		VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize);
399 #if DEVELOPMENT || DEBUG
400 		uint32_t *d32 = dest;
401 #endif
402 		assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
403 		    rval, *d32, *(d32 + 1), *(d32 + 2));
404 		success = (rval == PAGE_SIZE);
405 	} else {
406 		assert(ccodec == CCWK);
407 
408 		success = WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize, &pop_count);
409 
410 		VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1);
411 		VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize);
412 	}
413 
414 	assert(pop_count_p != NULL);
415 	*pop_count_p = pop_count;
416 	return success;
417 }
418 #pragma clang diagnostic pop
419 
420 uint32_t
vm_compressor_get_encode_scratch_size(void)421 vm_compressor_get_encode_scratch_size(void)
422 {
423 	if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
424 		return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
425 	} else {
426 		return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
427 	}
428 }
429 
430 uint32_t
vm_compressor_get_decode_scratch_size(void)431 vm_compressor_get_decode_scratch_size(void)
432 {
433 	if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
434 		return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
435 	} else {
436 		return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
437 	}
438 }
439 
440 
441 int
vm_compressor_algorithm(void)442 vm_compressor_algorithm(void)
443 {
444 	return vm_compressor_current_codec;
445 }
446 
447 void
vm_compressor_algorithm_init(void)448 vm_compressor_algorithm_init(void)
449 {
450 	vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
451 
452 #if defined(__arm64__)
453 	new_codec = CMODE_HYB;
454 
455 	if (PAGE_SIZE == 16384) {
456 		vmctune.lz4_threshold = 12288;
457 	}
458 #endif
459 
460 	PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec));
461 	assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) ||
462 	    (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)),
463 	    "Invalid VM compression codec: %u", new_codec);
464 
465 #if defined(__arm64__)
466 	uint32_t tmpc;
467 	if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) {
468 		new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
469 	} else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) {
470 		new_codec = CMODE_HYB;
471 	}
472 
473 	vm_compressor_current_codec = new_codec;
474 #endif /* arm/arm64 */
475 }
476