xref: /xnu-8796.101.5/libkern/zlib/inftrees.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2008-2016, 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* inftrees.c -- generate Huffman trees for efficient decoding
29  * Copyright (C) 1995-2005 Mark Adler
30  * For conditions of distribution and use, see copyright notice in zlib.h
31  */
32 
33 #include "zutil.h"
34 #include "inftrees.h"
35 
36 #define MAXBITS 15
37 
38 const char inflate_copyright[] =
39    " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
40 /*
41   If you use the zlib library in a product, an acknowledgment is welcome
42   in the documentation of your product. If for some reason you cannot
43   include such an acknowledgment, I would appreciate that you keep this
44   copyright string in the executable of your product.
45  */
46 
47 /*
48    Build a set of tables to decode the provided canonical Huffman code.
49    The code lengths are lens[0..codes-1].  The result starts at *table,
50    whose indices are 0..2^bits-1.  work is a writable array of at least
51    lens shorts, which is used as a work area.  type is the type of code
52    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
53    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
54    on return points to the next available entry's address.  bits is the
55    requested root table index bits, and on return it is the actual root
56    table index bits.  It will differ if the request is greater than the
57    longest code or if it is less than the shortest code.
58  */
59 int
inflate_table(codetype type,unsigned short FAR * lens,unsigned codes,code FAR * FAR * table,unsigned FAR * bits,unsigned short FAR * work)60 inflate_table(codetype type, unsigned short FAR *lens, unsigned codes,
61 	      code FAR * FAR *table, unsigned FAR *bits,
62 	      unsigned short FAR *work)
63 {
64     unsigned len;               /* a code's length in bits */
65     unsigned sym;               /* index of code symbols */
66     unsigned min, max;          /* minimum and maximum code lengths */
67     unsigned root;              /* number of index bits for root table */
68     unsigned curr;              /* number of index bits for current table */
69     unsigned drop;              /* code bits to drop for sub-table */
70     int left;                   /* number of prefix codes available */
71     unsigned used;              /* code entries in table used */
72     unsigned huff;              /* Huffman code */
73     unsigned incr;              /* for incrementing code, index */
74     unsigned fill;              /* index for replicating entries */
75     unsigned low;               /* low bits for current root entry */
76     unsigned mask;              /* mask for low root bits */
77     code this;                  /* table entry for duplication */
78     code FAR *next;             /* next available space in table */
79     const unsigned short FAR *base;     /* base value table to use */
80     const unsigned short FAR *extra;    /* extra bits table to use */
81     unsigned match;             /* use base and extra for symbol >= match */
82     unsigned short count[MAXBITS+1];    /* number of codes of each length */
83     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
84     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
85         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
86         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
87     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
88         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
89         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
90     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
91         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
92         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
93         8193, 12289, 16385, 24577, 0, 0};
94     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
95         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
96         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
97         28, 28, 29, 29, 64, 64};
98 
99     /*
100        Process a set of code lengths to create a canonical Huffman code.  The
101        code lengths are lens[0..codes-1].  Each length corresponds to the
102        symbols 0..codes-1.  The Huffman code is generated by first sorting the
103        symbols by length from short to long, and retaining the symbol order
104        for codes with equal lengths.  Then the code starts with all zero bits
105        for the first code of the shortest length, and the codes are integer
106        increments for the same length, and zeros are appended as the length
107        increases.  For the deflate format, these bits are stored backwards
108        from their more natural integer increment ordering, and so when the
109        decoding tables are built in the large loop below, the integer codes
110        are incremented backwards.
111 
112        This routine assumes, but does not check, that all of the entries in
113        lens[] are in the range 0..MAXBITS.  The caller must assure this.
114        1..MAXBITS is interpreted as that code length.  zero means that that
115        symbol does not occur in this code.
116 
117        The codes are sorted by computing a count of codes for each length,
118        creating from that a table of starting indices for each length in the
119        sorted table, and then entering the symbols in order in the sorted
120        table.  The sorted table is work[], with that space being provided by
121        the caller.
122 
123        The length counts are used for other purposes as well, i.e. finding
124        the minimum and maximum length codes, determining if there are any
125        codes at all, checking for a valid set of lengths, and looking ahead
126        at length counts to determine sub-table sizes when building the
127        decoding tables.
128      */
129 
130     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
131     for (len = 0; len <= MAXBITS; len++)
132         count[len] = 0;
133     for (sym = 0; sym < codes; sym++)
134         count[lens[sym]]++;
135 
136     /* bound code lengths, force root to be within code lengths */
137     root = *bits;
138     for (max = MAXBITS; max >= 1; max--)
139         if (count[max] != 0) break;
140     if (root > max) root = max;
141     if (max == 0) {                     /* no symbols to code at all */
142         this.op = (unsigned char)64;    /* invalid code marker */
143         this.bits = (unsigned char)1;
144         this.val = (unsigned short)0;
145         *(*table)++ = this;             /* make a table to force an error */
146         *(*table)++ = this;
147         *bits = 1;
148         return 0;     /* no symbols, but wait for decoding to report error */
149     }
150     for (min = 1; min <= MAXBITS; min++)
151         if (count[min] != 0) break;
152     if (root < min) root = min;
153 
154     /* check for an over-subscribed or incomplete set of lengths */
155     left = 1;
156     for (len = 1; len <= MAXBITS; len++) {
157         left <<= 1;
158         left -= count[len];
159         if (left < 0) return -1;        /* over-subscribed */
160     }
161     if (left > 0 && (type == CODES || max != 1))
162         return -1;                      /* incomplete set */
163 
164     /* generate offsets into symbol table for each length for sorting */
165     offs[1] = 0;
166     for (len = 1; len < MAXBITS; len++)
167         offs[len + 1] = offs[len] + count[len];
168 
169     /* sort symbols by length, by symbol order within each length */
170     for (sym = 0; sym < codes; sym++)
171         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
172 
173     /*
174        Create and fill in decoding tables.  In this loop, the table being
175        filled is at next and has curr index bits.  The code being used is huff
176        with length len.  That code is converted to an index by dropping drop
177        bits off of the bottom.  For codes where len is less than drop + curr,
178        those top drop + curr - len bits are incremented through all values to
179        fill the table with replicated entries.
180 
181        root is the number of index bits for the root table.  When len exceeds
182        root, sub-tables are created pointed to by the root entry with an index
183        of the low root bits of huff.  This is saved in low to check for when a
184        new sub-table should be started.  drop is zero when the root table is
185        being filled, and drop is root when sub-tables are being filled.
186 
187        When a new sub-table is needed, it is necessary to look ahead in the
188        code lengths to determine what size sub-table is needed.  The length
189        counts are used for this, and so count[] is decremented as codes are
190        entered in the tables.
191 
192        used keeps track of how many table entries have been allocated from the
193        provided *table space.  It is checked when a LENS table is being made
194        against the space in *table, ENOUGH, minus the maximum space needed by
195        the worst case distance code, MAXD.  This should never happen, but the
196        sufficiency of ENOUGH has not been proven exhaustively, hence the check.
197        This assumes that when type == LENS, bits == 9.
198 
199        sym increments through all symbols, and the loop terminates when
200        all codes of length max, i.e. all codes, have been processed.  This
201        routine permits incomplete codes, so another loop after this one fills
202        in the rest of the decoding tables with invalid code markers.
203      */
204 
205     /* set up for code type */
206     switch (type) {
207     case CODES:
208         base = extra = work;    /* dummy value--not used */
209         match = 20;
210         break;
211     case LENS:
212         base = lbase;
213         extra = lext;
214         match = 257;
215         break;
216     default:            /* DISTS */
217         base = dbase;
218         extra = dext;
219         match = 0;
220     }
221 
222     /* initialize state for loop */
223     huff = 0;                   /* starting code */
224     sym = 0;                    /* starting code symbol */
225     len = min;                  /* starting code length */
226     next = *table;              /* current table to fill in */
227     curr = root;                /* current table index bits */
228     drop = 0;                   /* current bits to drop from code for index */
229     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
230     used = 1U << root;          /* use root table entries */
231     mask = used - 1;            /* mask for comparing low */
232 
233     /* check available table space */
234     if (type == LENS && used >= ENOUGH - MAXD)
235         return 1;
236 
237     /* process all codes and make table entries */
238     for (;;) {
239         /* create table entry */
240         this.bits = (unsigned char)(len - drop);
241         if (work[sym] + 1 < match) {
242             this.op = (unsigned char)0;
243             this.val = work[sym];
244         }
245         else if (work[sym] >= match) {
246             this.op = (unsigned char)(extra[work[sym] - match]);
247             this.val = base[work[sym] - match];
248         }
249         else {
250             this.op = (unsigned char)(32 + 64);         /* end of block */
251             this.val = 0;
252         }
253 
254         /* replicate for those indices with low len bits equal to huff */
255         incr = 1U << (len - drop);
256         fill = 1U << curr;
257         min = fill;                 /* save offset to next table */
258         do {
259             fill -= incr;
260             next[(huff >> drop) + fill] = this;
261         } while (fill != 0);
262 
263         /* backwards increment the len-bit code huff */
264         incr = 1U << (len - 1);
265         while (huff & incr)
266             incr >>= 1;
267         if (incr != 0) {
268             huff &= incr - 1;
269             huff += incr;
270         }
271         else
272             huff = 0;
273 
274         /* go to next symbol, update count, len */
275         sym++;
276         if (--(count[len]) == 0) {
277             if (len == max) break;
278             len = lens[work[sym]];
279         }
280 
281         /* create new sub-table if needed */
282         if (len > root && (huff & mask) != low) {
283             /* if first time, transition to sub-tables */
284             if (drop == 0)
285                 drop = root;
286 
287             /* increment past last table */
288             next += min;            /* here min is 1 << curr */
289 
290             /* determine length of next table */
291             curr = len - drop;
292             left = (int)(1 << curr);
293             while (curr + drop < max) {
294                 left -= count[curr + drop];
295                 if (left <= 0) break;
296                 curr++;
297                 left <<= 1;
298             }
299 
300             /* check for enough space */
301             used += 1U << curr;
302             if (type == LENS && used >= ENOUGH - MAXD)
303                 return 1;
304 
305             /* point entry in root table to sub-table */
306             low = huff & mask;
307             (*table)[low].op = (unsigned char)curr;
308             (*table)[low].bits = (unsigned char)root;
309             (*table)[low].val = (unsigned short)(next - *table);
310         }
311     }
312 
313     /*
314        Fill in rest of table for incomplete codes.  This loop is similar to the
315        loop above in incrementing huff for table indices.  It is assumed that
316        len is equal to curr + drop, so there is no loop needed to increment
317        through high index bits.  When the current sub-table is filled, the loop
318        drops back to the root table to fill in any remaining entries there.
319      */
320     this.op = (unsigned char)64;                /* invalid code marker */
321     this.bits = (unsigned char)(len - drop);
322     this.val = (unsigned short)0;
323     while (huff != 0) {
324         /* when done with sub-table, drop back to root table */
325         if (drop != 0 && (huff & mask) != low) {
326             drop = 0;
327             len = root;
328             next = *table;
329             this.bits = (unsigned char)len;
330         }
331 
332         /* put invalid code marker in table */
333         next[huff >> drop] = this;
334 
335         /* backwards increment the len-bit code huff */
336         incr = 1U << (len - 1);
337         while (huff & incr)
338             incr >>= 1;
339         if (incr != 0) {
340             huff &= incr - 1;
341             huff += incr;
342         }
343         else
344             huff = 0;
345     }
346 
347     /* set return parameters */
348     *table += used;
349     *bits = root;
350     return 0;
351 }
352