source: mainline/uspace/lib/compress/inflate.c@ 0437dd5

Last change on this file since 0437dd5 was 09ab0a9a, checked in by Jiri Svoboda <jiri@…>, 7 years ago

Fix vertical spacing with new Ccheck revision.

  • Property mode set to 100644
File size: 17.4 KB
Line 
1/*
2 * Copyright (c) 2010 Mark Adler
3 * Copyright (c) 2010 Martin Decky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @file
31 * @brief Implementation of inflate decompression
32 *
33 * A simple inflate implementation (decompression of `deflate' stream as
34 * described by RFC 1951) based on puff.c by Mark Adler. This code is
35 * currently not optimized for speed but for readability.
36 *
37 * All dynamically allocated memory memory is taken from the stack. The
38 * stack usage should be typically bounded by 2 KB.
39 *
40 * Original copyright notice:
41 *
42 * Copyright (C) 2002-2010 Mark Adler, all rights reserved
43 * version 2.1, 4 Apr 2010
44 *
45 * This software is provided 'as-is', without any express or implied
46 * warranty. In no event will the author be held liable for any damages
47 * arising from the use of this software.
48 *
49 * Permission is granted to anyone to use this software for any purpose,
50 * including commercial applications, and to alter it and redistribute it
51 * freely, subject to the following restrictions:
52 *
53 * 1. The origin of this software must not be misrepresented; you must not
54 * claim that you wrote the original software. If you use this software
55 * in a product, an acknowledgment in the product documentation would be
56 * appreciated but is not required.
57 * 2. Altered source versions must be plainly marked as such, and must not
58 * be misrepresented as being the original software.
59 * 3. This notice may not be removed or altered from any source
60 * distribution.
61 *
62 * Mark Adler <madler@alumni.caltech.edu>
63 *
64 */
65
66#include <stddef.h>
67#include <stdint.h>
68#include <stdbool.h>
69#include <errno.h>
70#include <mem.h>
71#include "inflate.h"
72
73/** Maximum bits in the Huffman code */
74#define MAX_HUFFMAN_BIT 15
75
76/** Number of length codes */
77#define MAX_LEN 29
78/** Number of distance codes */
79#define MAX_DIST 30
80/** Number of order codes */
81#define MAX_ORDER 19
82/** Number of literal/length codes */
83#define MAX_LITLEN 286
84/** Number of fixed literal/length codes */
85#define MAX_FIXED_LITLEN 288
86
87/** Number of all codes */
88#define MAX_CODE (MAX_LITLEN + MAX_DIST)
89
90/** Check for input buffer overrun condition */
91#define CHECK_OVERRUN(state) \
92 do { \
93 if ((state).overrun) \
94 return ELIMIT; \
95 } while (false)
96
97/** Inflate algorithm state
98 *
99 */
100typedef struct {
101 uint8_t *dest; /**< Output buffer */
102 size_t destlen; /**< Output buffer size */
103 size_t destcnt; /**< Position in the output buffer */
104
105 uint8_t *src; /**< Input buffer */
106 size_t srclen; /**< Input buffer size */
107 size_t srccnt; /**< Position in the input buffer */
108
109 uint16_t bitbuf; /**< Bit buffer */
110 size_t bitlen; /**< Number of bits in the bit buffer */
111
112 bool overrun; /**< Overrun condition */
113} inflate_state_t;
114
115/** Huffman code description
116 *
117 */
118typedef struct {
119 uint16_t *count; /**< Array of symbol counts */
120 uint16_t *symbol; /**< Array of symbols */
121} huffman_t;
122
123/** Length codes
124 *
125 */
126static const uint16_t lens[MAX_LEN] = {
127 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
128 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
129};
130
131/** Extended length codes
132 *
133 */
134static const uint16_t lens_ext[MAX_LEN] = {
135 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
136 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
137};
138
139/** Distance codes
140 *
141 */
142static const uint16_t dists[MAX_DIST] = {
143 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
144 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
145 8193, 12289, 16385, 24577
146};
147
148/** Extended distance codes
149 *
150 */
151static const uint16_t dists_ext[MAX_DIST] = {
152 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
153 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
154 12, 12, 13, 13
155};
156
157/** Order codes
158 *
159 */
160static const short order[MAX_ORDER] = {
161 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
162};
163
164/** Static length symbol counts
165 *
166 */
167static uint16_t len_count[MAX_HUFFMAN_BIT + 1] = {
168 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
169};
170
171/** Static length symbols
172 *
173 */
174static uint16_t len_symbol[MAX_FIXED_LITLEN] = {
175 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268,
176 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 0, 1, 2,
177 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
178 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
179 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
180 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
181 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
182 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
183 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
184 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
185 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
186 140, 141, 142, 143, 280, 281, 282, 283, 284, 285, 286, 287, 144,
187 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
188 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
189 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
190 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
191 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
192 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
193 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
194 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
195 249, 250, 251, 252, 253, 254, 255
196};
197
198/** Static distance symbol counts
199 *
200 */
201static uint16_t dist_count[MAX_HUFFMAN_BIT + 1] = {
202 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
205/** Static distance symbols
206 *
207 */
208static uint16_t dist_symbol[MAX_DIST] = {
209 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
210 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
211};
212
213/** Huffman code for lengths
214 *
215 */
216static huffman_t len_code = {
217 .count = len_count,
218 .symbol = len_symbol
219};
220
221/** Huffman code for distances
222 *
223 */
224static huffman_t dist_code = {
225 .count = dist_count,
226 .symbol = dist_symbol
227};
228
229/** Get bits from the bit buffer
230 *
231 * @param state Inflate state.
232 * @param cnt Number of bits to return (at most 16).
233 *
234 * @return Returned bits.
235 *
236 */
237static inline uint16_t get_bits(inflate_state_t *state, size_t cnt)
238{
239 /* Bit accumulator for at least 20 bits */
240 uint32_t val = state->bitbuf;
241
242 while (state->bitlen < cnt) {
243 if (state->srccnt == state->srclen) {
244 state->overrun = true;
245 return 0;
246 }
247
248 /* Load 8 more bits */
249 val |= ((uint32_t) state->src[state->srccnt]) << state->bitlen;
250 state->srccnt++;
251 state->bitlen += 8;
252 }
253
254 /* Update bits in the buffer */
255 state->bitbuf = (uint16_t) (val >> cnt);
256 state->bitlen -= cnt;
257
258 return ((uint16_t) (val & ((1 << cnt) - 1)));
259}
260
261/** Decode `stored' block
262 *
263 * @param state Inflate state.
264 *
265 * @return EOK on success.
266 * @return ELIMIT on input buffer overrun.
267 * @return ENOMEM on output buffer overrun.
268 * @return EINVAL on invalid data.
269 *
270 */
271static errno_t inflate_stored(inflate_state_t *state)
272{
273 /* Discard bits in the bit buffer */
274 state->bitbuf = 0;
275 state->bitlen = 0;
276
277 if (state->srccnt + 4 > state->srclen)
278 return ELIMIT;
279
280 uint16_t len =
281 state->src[state->srccnt] | (state->src[state->srccnt + 1] << 8);
282 uint16_t len_compl =
283 state->src[state->srccnt + 2] | (state->src[state->srccnt + 3] << 8);
284
285 /* Check block length and its complement */
286 if (((int16_t) len) != ~((int16_t) len_compl))
287 return EINVAL;
288
289 state->srccnt += 4;
290
291 /* Check input buffer size */
292 if (state->srccnt + len > state->srclen)
293 return ELIMIT;
294
295 /* Check output buffer size */
296 if (state->destcnt + len > state->destlen)
297 return ENOMEM;
298
299 /* Copy data */
300 memcpy(state->dest + state->destcnt, state->src + state->srccnt, len);
301 state->srccnt += len;
302 state->destcnt += len;
303
304 return EOK;
305}
306
307/** Decode a symbol using the Huffman code
308 *
309 * @param state Inflate state.
310 * @param huffman Huffman code.
311 * @param symbol Decoded symbol.
312 *
313 * @param EOK on success.
314 * @param EINVAL on invalid Huffman code.
315 *
316 */
317static errno_t huffman_decode(inflate_state_t *state, huffman_t *huffman,
318 uint16_t *symbol)
319{
320 /* Decode bits */
321 uint16_t code = 0;
322
323 /* First code of the given length */
324 size_t first = 0;
325
326 /*
327 * Index of the first code of the given length
328 * in the symbol table
329 */
330 size_t index = 0;
331
332 /* Current number of bits in the code */
333 size_t len;
334
335 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
336 /* Get next bit */
337 code |= get_bits(state, 1);
338 CHECK_OVERRUN(*state);
339
340 uint16_t count = huffman->count[len];
341 if (code < first + count) {
342 /* Return decoded symbol */
343 *symbol = huffman->symbol[index + code - first];
344 return EOK;
345 }
346
347 /* Update for next length */
348 index += count;
349 first += count;
350 first <<= 1;
351 code <<= 1;
352 }
353
354 return EINVAL;
355}
356
357/** Construct Huffman tables from canonical Huffman code
358 *
359 * @param huffman Constructed Huffman tables.
360 * @param length Lengths of the canonical Huffman code.
361 * @param n Number of lengths.
362 *
363 * @return 0 if the Huffman code set is complete.
364 * @return Negative value for an over-subscribed code set.
365 * @return Positive value for an incomplete code set.
366 *
367 */
368static int16_t huffman_construct(huffman_t *huffman, uint16_t *length, size_t n)
369{
370 /* Count number of codes for each length */
371 size_t len;
372 for (len = 0; len <= MAX_HUFFMAN_BIT; len++)
373 huffman->count[len] = 0;
374
375 /* We assume that the lengths are within bounds */
376 size_t symbol;
377 for (symbol = 0; symbol < n; symbol++)
378 huffman->count[length[symbol]]++;
379
380 if (huffman->count[0] == n) {
381 /* The code is complete, but decoding will fail */
382 return 0;
383 }
384
385 /* Check for an over-subscribed or incomplete set of lengths */
386 int16_t left = 1;
387 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
388 left <<= 1;
389 left -= huffman->count[len];
390 if (left < 0) {
391 /* Over-subscribed */
392 return left;
393 }
394 }
395
396 /* Generate offsets into symbol table */
397 uint16_t offs[MAX_HUFFMAN_BIT + 1];
398
399 offs[1] = 0;
400 for (len = 1; len < MAX_HUFFMAN_BIT; len++)
401 offs[len + 1] = offs[len] + huffman->count[len];
402
403 for (symbol = 0; symbol < n; symbol++) {
404 if (length[symbol] != 0) {
405 huffman->symbol[offs[length[symbol]]] = symbol;
406 offs[length[symbol]]++;
407 }
408 }
409
410 return left;
411}
412
413/** Decode literal/length and distance codes
414 *
415 * Decode until end-of-block code.
416 *
417 * @param state Inflate state.
418 * @param len_code Huffman code for literal/length.
419 * @param dist_code Huffman code for distance.
420 *
421 * @return EOK on success.
422 * @return ENOENT on distance too large.
423 * @return EINVAL on invalid Huffman code.
424 * @return ELIMIT on input buffer overrun.
425 * @return ENOMEM on output buffer overrun.
426 *
427 */
428static errno_t inflate_codes(inflate_state_t *state, huffman_t *len_code,
429 huffman_t *dist_code)
430{
431 uint16_t symbol;
432
433 do {
434 errno_t err = huffman_decode(state, len_code, &symbol);
435 if (err != EOK) {
436 /* Error decoding */
437 return err;
438 }
439
440 if (symbol < 256) {
441 /* Write out literal */
442 if (state->destcnt == state->destlen)
443 return ENOMEM;
444
445 state->dest[state->destcnt] = (uint8_t) symbol;
446 state->destcnt++;
447 } else if (symbol > 256) {
448 /* Compute length */
449 symbol -= 257;
450 if (symbol >= 29)
451 return EINVAL;
452
453 size_t len = lens[symbol] + get_bits(state, lens_ext[symbol]);
454 CHECK_OVERRUN(*state);
455
456 /* Get distance */
457 err = huffman_decode(state, dist_code, &symbol);
458 if (err != EOK)
459 return err;
460
461 size_t dist = dists[symbol] + get_bits(state, dists_ext[symbol]);
462 if (dist > state->destcnt)
463 return ENOENT;
464
465 if (state->destcnt + len > state->destlen)
466 return ENOMEM;
467
468 while (len > 0) {
469 /* Copy len bytes from distance bytes back */
470 state->dest[state->destcnt] =
471 state->dest[state->destcnt - dist];
472 state->destcnt++;
473 len--;
474 }
475 }
476 } while (symbol != 256);
477
478 return EOK;
479}
480
481/** Decode `fixed codes' block
482 *
483 * @param state Inflate state.
484 * @param len_code Huffman code for literal/length.
485 * @param dist_code Huffman code for distance.
486 *
487 * @return EOK on success.
488 * @return ENOENT on distance too large.
489 * @return EINVAL on invalid Huffman code.
490 * @return ELIMIT on input buffer overrun.
491 * @return ENOMEM on output buffer overrun.
492 *
493 */
494static errno_t inflate_fixed(inflate_state_t *state, huffman_t *len_code,
495 huffman_t *dist_code)
496{
497 return inflate_codes(state, len_code, dist_code);
498}
499
500/** Decode `dynamic codes' block
501 *
502 * @param state Inflate state.
503 *
504 * @return EOK on success.
505 * @return ENOENT on distance too large.
506 * @return EINVAL on invalid Huffman code.
507 * @return ELIMIT on input buffer overrun.
508 * @return ENOMEM on output buffer overrun.
509 *
510 */
511static errno_t inflate_dynamic(inflate_state_t *state)
512{
513 uint16_t length[MAX_CODE];
514 uint16_t dyn_len_count[MAX_HUFFMAN_BIT + 1];
515 uint16_t dyn_len_symbol[MAX_LITLEN];
516 uint16_t dyn_dist_count[MAX_HUFFMAN_BIT + 1];
517 uint16_t dyn_dist_symbol[MAX_DIST];
518 huffman_t dyn_len_code;
519 huffman_t dyn_dist_code;
520
521 dyn_len_code.count = dyn_len_count;
522 dyn_len_code.symbol = dyn_len_symbol;
523
524 dyn_dist_code.count = dyn_dist_count;
525 dyn_dist_code.symbol = dyn_dist_symbol;
526
527 /* Get number of bits in each table */
528 uint16_t nlen = get_bits(state, 5) + 257;
529 CHECK_OVERRUN(*state);
530
531 uint16_t ndist = get_bits(state, 5) + 1;
532 CHECK_OVERRUN(*state);
533
534 uint16_t ncode = get_bits(state, 4) + 4;
535 CHECK_OVERRUN(*state);
536
537 if ((nlen > MAX_LITLEN) || (ndist > MAX_DIST) ||
538 (ncode > MAX_ORDER))
539 return EINVAL;
540
541 /* Read code length code lengths */
542 uint16_t index;
543 for (index = 0; index < ncode; index++) {
544 length[order[index]] = get_bits(state, 3);
545 CHECK_OVERRUN(*state);
546 }
547
548 /* Set missing lengths to zero */
549 for (index = ncode; index < MAX_ORDER; index++)
550 length[order[index]] = 0;
551
552 /* Build Huffman code */
553 int16_t rc = huffman_construct(&dyn_len_code, length, MAX_ORDER);
554 if (rc != 0)
555 return EINVAL;
556
557 /* Read length/literal and distance code length tables */
558 index = 0;
559 while (index < nlen + ndist) {
560 uint16_t symbol;
561 errno_t err = huffman_decode(state, &dyn_len_code, &symbol);
562 if (err != EOK)
563 return EOK;
564
565 if (symbol < 16) {
566 length[index] = symbol;
567 index++;
568 } else {
569 uint16_t len = 0;
570
571 if (symbol == 16) {
572 if (index == 0)
573 return EINVAL;
574
575 len = length[index - 1];
576 symbol = get_bits(state, 2) + 3;
577 CHECK_OVERRUN(*state);
578 } else if (symbol == 17) {
579 symbol = get_bits(state, 3) + 3;
580 CHECK_OVERRUN(*state);
581 } else {
582 symbol = get_bits(state, 7) + 11;
583 CHECK_OVERRUN(*state);
584 }
585
586 if (index + symbol > nlen + ndist)
587 return EINVAL;
588
589 while (symbol > 0) {
590 length[index] = len;
591 index++;
592 symbol--;
593 }
594 }
595 }
596
597 /* Check for end-of-block code */
598 if (length[256] == 0)
599 return EINVAL;
600
601 /* Build Huffman tables for literal/length codes */
602 rc = huffman_construct(&dyn_len_code, length, nlen);
603 if ((rc < 0) || ((rc > 0) && (dyn_len_code.count[0] + 1 != nlen)))
604 return EINVAL;
605
606 /* Build Huffman tables for distance codes */
607 rc = huffman_construct(&dyn_dist_code, length + nlen, ndist);
608 if ((rc < 0) || ((rc > 0) && (dyn_dist_code.count[0] + 1 != ndist)))
609 return EINVAL;
610
611 return inflate_codes(state, &dyn_len_code, &dyn_dist_code);
612}
613
614/** Inflate data
615 *
616 * @param src Source data buffer.
617 * @param srclen Source buffer size (bytes).
618 * @param dest Destination data buffer.
619 * @param destlen Destination buffer size (bytes).
620 *
621 * @return EOK on success.
622 * @return ENOENT on distance too large.
623 * @return EINVAL on invalid Huffman code or invalid deflate data.
624 * @return ELIMIT on input buffer overrun.
625 * @return ENOMEM on output buffer overrun.
626 *
627 */
628errno_t inflate(void *src, size_t srclen, void *dest, size_t destlen)
629{
630 /* Initialize the state */
631 inflate_state_t state;
632
633 state.dest = (uint8_t *) dest;
634 state.destlen = destlen;
635 state.destcnt = 0;
636
637 state.src = (uint8_t *) src;
638 state.srclen = srclen;
639 state.srccnt = 0;
640
641 state.bitbuf = 0;
642 state.bitlen = 0;
643
644 state.overrun = false;
645
646 uint16_t last;
647 errno_t ret = EOK;
648
649 do {
650 /* Last block is indicated by a non-zero bit */
651 last = get_bits(&state, 1);
652 CHECK_OVERRUN(state);
653
654 /* Block type */
655 uint16_t type = get_bits(&state, 2);
656 CHECK_OVERRUN(state);
657
658 switch (type) {
659 case 0:
660 ret = inflate_stored(&state);
661 break;
662 case 1:
663 ret = inflate_fixed(&state, &len_code, &dist_code);
664 break;
665 case 2:
666 ret = inflate_dynamic(&state);
667 break;
668 default:
669 ret = EINVAL;
670 }
671 } while ((!last) && (ret == 0));
672
673 return ret;
674}
Note: See TracBrowser for help on using the repository browser.