xref: /xnu-11215.41.3/libkern/kxld/WKdmCompress.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 #include "WKdm.h"
2 
3 /***********************************************************************
4  *                   THE PACKING ROUTINES
5  */
6 
7 /* WK_pack_2bits()
8  * Pack some multiple of four words holding two-bit tags (in the low
9  * two bits of each byte) into an integral number of words, i.e.,
10  * one fourth as many.
11  * NOTE: Pad the input out with zeroes to a multiple of four words!
12  */
13 static WK_word*
WK_pack_2bits(WK_word * source_buf,WK_word * source_end,WK_word * dest_buf)14 WK_pack_2bits(WK_word* source_buf,
15     WK_word* source_end,
16     WK_word* dest_buf)
17 {
18 	WK_word* src_next = source_buf;
19 	WK_word* dest_next = dest_buf;
20 
21 	while (src_next < source_end) {
22 		WK_word temp = src_next[0];
23 		temp |= (src_next[1] << 2);
24 		temp |= (src_next[2] << 4);
25 		temp |= (src_next[3] << 6);
26 
27 		dest_next[0] = temp;
28 		dest_next++;
29 		src_next += 4;
30 	}
31 
32 	return dest_next;
33 }
34 
35 /* WK_pack_4bits()
36  * Pack an even number of words holding 4-bit patterns in the low bits
37  * of each byte into half as many words.
38  * note: pad out the input with zeroes to an even number of words!
39  */
40 
41 static WK_word*
WK_pack_4bits(WK_word * source_buf,WK_word * source_end,WK_word * dest_buf)42 WK_pack_4bits(WK_word* source_buf,
43     WK_word* source_end,
44     WK_word* dest_buf)
45 {
46 	WK_word* src_next = source_buf;
47 	WK_word* dest_next = dest_buf;
48 
49 	/* this loop should probably be unrolled */
50 	while (src_next < source_end) {
51 		WK_word temp = src_next[0];
52 		temp |= (src_next[1] << 4);
53 
54 		dest_next[0] = temp;
55 		dest_next++;
56 		src_next += 2;
57 	}
58 
59 	return dest_next;
60 }
61 
62 /* pack_3_tenbits()
63  * Pack a sequence of three ten bit items into one word.
64  * note: pad out the input with zeroes to an even number of words!
65  */
66 static WK_word*
WK_pack_3_tenbits(WK_word * source_buf,WK_word * source_end,WK_word * dest_buf)67 WK_pack_3_tenbits(WK_word* source_buf,
68     WK_word* source_end,
69     WK_word* dest_buf)
70 {
71 	WK_word* src_next = source_buf;
72 	WK_word* dest_next = dest_buf;
73 
74 	/* this loop should probably be unrolled */
75 	while (src_next < source_end) {
76 		WK_word temp = src_next[0];
77 		temp |= (src_next[1] << 10);
78 		temp |= (src_next[2] << 20);
79 
80 		dest_next[0] = temp;
81 		dest_next++;
82 		src_next += 3;
83 	}
84 
85 	return dest_next;
86 }
87 
88 /***************************************************************************
89  *  WKdm_compress()---THE COMPRESSOR
90  */
91 
92 unsigned int
WKdm_compress(WK_word * src_buf,WK_word * dest_buf,unsigned int num_input_words)93 WKdm_compress(WK_word* src_buf,
94     WK_word* dest_buf,
95     unsigned int num_input_words)
96 {
97 	DictionaryElement dictionary[DICTIONARY_SIZE];
98 
99 	/* arrays that hold output data in intermediate form during modeling */
100 	/* and whose contents are packed into the actual output after modeling */
101 
102 	/* sizes of these arrays should be increased if you want to compress
103 	 * pages larger than 4KB
104 	 */
105 	WK_word tempTagsArray[300];   /* tags for everything          */
106 	WK_word tempQPosArray[300];   /* queue positions for matches  */
107 	WK_word tempLowBitsArray[1200]; /* low bits for partial matches */
108 
109 	/* boundary_tmp will be used for keeping track of what's where in
110 	 * the compressed page during packing
111 	 */
112 	WK_word* boundary_tmp;
113 
114 	/* Fill pointers for filling intermediate arrays (of queue positions
115 	 * and low bits) during encoding.
116 	 * Full words go straight to the destination buffer area reserved
117 	 * for them.  (Right after where the tags go.)
118 	 */
119 	WK_word* next_full_patt;
120 	char* next_tag = (char *) tempTagsArray;
121 	char* next_qp = (char *) tempQPosArray;
122 	WK_word* next_low_bits = tempLowBitsArray;
123 
124 	WK_word* next_input_word = src_buf;
125 	WK_word* end_of_input = src_buf + num_input_words;
126 
127 	PRELOAD_DICTIONARY;
128 
129 	next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16);
130 
131 #ifdef WK_DEBUG
132 	printf("\nIn WKdm_compress\n");
133 	printf("About to actually compress, src_buf is %u\n", src_buf);
134 	printf("dictionary is at %u\n", dictionary);
135 	printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt);
136 	fflush(stdout);
137 #endif
138 
139 	while (next_input_word < end_of_input) {
140 		WK_word *dict_location;
141 		WK_word dict_word;
142 		WK_word input_word = *next_input_word;
143 
144 		/* compute hash value, which is a byte offset into the dictionary,
145 		 * and add it to the base address of the dictionary. Cast back and
146 		 * forth to/from char * so no shifts are needed
147 		 */
148 		dict_location =
149 		    (WK_word *)
150 		    ((void*) (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)));
151 
152 		dict_word = *dict_location;
153 
154 		if (input_word == dict_word) {
155 			RECORD_EXACT(dict_location - dictionary);
156 		} else if (input_word == 0) {
157 			RECORD_ZERO;
158 		} else {
159 			WK_word input_high_bits = HIGH_BITS(input_word);
160 			if (input_high_bits == HIGH_BITS(dict_word)) {
161 				RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word));
162 				*dict_location = input_word;
163 			} else {
164 				RECORD_MISS(input_word);
165 				*dict_location = input_word;
166 			}
167 		}
168 		next_input_word++;
169 	}
170 
171 #ifdef WK_DEBUG
172 	printf("AFTER MODELING in WKdm_compress()\n"); fflush(stdout);
173 	printf("tempTagsArray holds %u bytes\n",
174 	    next_tag - (char *) tempTagsArray);
175 	printf("tempQPosArray holds %u bytes\n",
176 	    next_qp - (char *) tempQPosArray);
177 	printf("tempLowBitsArray holds %u bytes\n",
178 	    (char *) next_low_bits - (char *) tempLowBitsArray);
179 
180 	printf("next_full_patt is %p\n",
181 	    next_full_patt);
182 
183 	printf(" i.e., there are %u full patterns\n",
184 	    next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)));
185 	fflush(stdout);
186 
187 	{ int i;
188 	  WK_word *arr = (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16));
189 
190 	  printf("  first 20 full patterns are: \n");
191 	  for (i = 0; i < 20; i++) {
192 		  printf(" %d", arr[i]);
193 	  }
194 	  printf("\n");}
195 #endif
196 
197 	/* Record (into the header) where we stopped writing full words,
198 	 * which is where we will pack the queue positions.  (Recall
199 	 * that we wrote the full words directly into the dest buffer
200 	 * during modeling.
201 	 */
202 
203 	SET_QPOS_AREA_START(dest_buf, next_full_patt);
204 
205 	/* Pack the tags into the tags area, between the page header
206 	 * and the full words area.  We don't pad for the packer
207 	 * because we assume that the page size is a multiple of 16.
208 	 */
209 
210 #ifdef WK_DEBUG
211 	printf("about to pack %u bytes holding tags\n",
212 	    next_tag - (char *) tempTagsArray);
213 
214 	{ int i;
215 	  char* arr = (char *) tempTagsArray;
216 
217 	  printf("  first 200 tags are: \n");
218 	  for (i = 0; i < 200; i++) {
219 		  printf(" %d", arr[i]);
220 	  }
221 	  printf("\n");}
222 #endif
223 
224 	boundary_tmp = WK_pack_2bits(tempTagsArray,
225 	    (WK_word *) ((void *) next_tag),
226 	    dest_buf + HEADER_SIZE_IN_WORDS);
227 
228 #ifdef WK_DEBUG
229 	printf("packing tags stopped at %u\n", boundary_tmp);
230 #endif
231 
232 	/* Pack the queue positions into the area just after
233 	 * the full words.  We have to round up the source
234 	 * region to a multiple of two words.
235 	 */
236 
237 	{
238 		unsigned int num_bytes_to_pack = (unsigned int)(next_qp - (char *) tempQPosArray);
239 		unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8);
240 		unsigned int num_source_words = num_packed_words * 2;
241 		WK_word* endQPosArray = tempQPosArray + num_source_words;
242 
243 		/* Pad out the array with zeros to avoid corrupting real packed
244 		 *  values. */
245 		for (; /* next_qp is already set as desired */
246 		    next_qp < (char*)endQPosArray;
247 		    next_qp++) {
248 			*next_qp = 0;
249 		}
250 
251 #ifdef WK_DEBUG
252 		printf("about to pack %u (bytes holding) queue posns.\n",
253 		    num_bytes_to_pack);
254 		printf("packing them from %u words into %u words\n",
255 		    num_source_words, num_packed_words);
256 		printf("dest is range %u to %u\n",
257 		    next_full_patt, next_full_patt + num_packed_words);
258 		{ int i;
259 		  char *arr = (char *) tempQPosArray;
260 		  printf("  first 200 queue positions are: \n");
261 		  for (i = 0; i < 200; i++) {
262 			  printf(" %d", arr[i]);
263 		  }
264 		  printf("\n");}
265 #endif
266 
267 		boundary_tmp = WK_pack_4bits(tempQPosArray,
268 		    endQPosArray,
269 		    next_full_patt);
270 #ifdef WK_DEBUG
271 		printf("Packing of queue positions stopped at %u\n", boundary_tmp);
272 #endif // WK_DEBUG
273 
274 		/* Record (into the header) where we stopped packing queue positions,
275 		 * which is where we will start packing low bits.
276 		 */
277 		SET_LOW_BITS_AREA_START(dest_buf, boundary_tmp);
278 	}
279 
280 	/* Pack the low bit patterns into the area just after
281 	 * the queue positions.  We have to round up the source
282 	 * region to a multiple of three words.
283 	 */
284 
285 	{
286 		unsigned int num_tenbits_to_pack =
287 		    (unsigned int)(next_low_bits - tempLowBitsArray);
288 		unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3);
289 		unsigned int num_source_words = num_packed_words * 3;
290 		WK_word* endLowBitsArray = tempLowBitsArray + num_source_words;
291 
292 		/* Pad out the array with zeros to avoid corrupting real packed
293 		 *  values. */
294 
295 		for (; /* next_low_bits is already set as desired */
296 		    next_low_bits < endLowBitsArray;
297 		    next_low_bits++) {
298 			*next_low_bits = 0;
299 		}
300 
301 #ifdef WK_DEBUG
302 		printf("about to pack low bits\n");
303 		printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack);
304 		printf("endLowBitsArray is %u\n", endLowBitsArray);
305 #endif
306 
307 		boundary_tmp = WK_pack_3_tenbits(tempLowBitsArray,
308 		    endLowBitsArray,
309 		    boundary_tmp);
310 
311 		SET_LOW_BITS_AREA_END(dest_buf, boundary_tmp);
312 	}
313 
314 	return (unsigned int)((char *) boundary_tmp - (char *) dest_buf);
315 }
316