source: mainline/uspace/lib/compress/inflate.c@ 9c96634

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 9c96634 was 8d2dd7f2, checked in by Jakub Jermar <jakub@…>, 8 years ago

Reduce the number of files that include <sys/types.h>

  • Property mode set to 100644
File size: 17.5 KB
Line 
1/*
2 * Copyright (c) 2010 Mark Adler
3 * Copyright (c) 2010 Martin Decky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @file
31 * @brief Implementation of inflate decompression
32 *
33 * A simple inflate implementation (decompression of `deflate' stream as
34 * described by RFC 1951) based on puff.c by Mark Adler. This code is
35 * currently not optimized for speed but for readability.
36 *
37 * All dynamically allocated memory memory is taken from the stack. The
38 * stack usage should be typically bounded by 2 KB.
39 *
40 * Original copyright notice:
41 *
42 * Copyright (C) 2002-2010 Mark Adler, all rights reserved
43 * version 2.1, 4 Apr 2010
44 *
45 * This software is provided 'as-is', without any express or implied
46 * warranty. In no event will the author be held liable for any damages
47 * arising from the use of this software.
48 *
49 * Permission is granted to anyone to use this software for any purpose,
50 * including commercial applications, and to alter it and redistribute it
51 * freely, subject to the following restrictions:
52 *
53 * 1. The origin of this software must not be misrepresented; you must not
54 * claim that you wrote the original software. If you use this software
55 * in a product, an acknowledgment in the product documentation would be
56 * appreciated but is not required.
57 * 2. Altered source versions must be plainly marked as such, and must not
58 * be misrepresented as being the original software.
59 * 3. This notice may not be removed or altered from any source
60 * distribution.
61 *
62 * Mark Adler <madler@alumni.caltech.edu>
63 *
64 */
65
66#include <stddef.h>
67#include <stdint.h>
68#include <stdbool.h>
69#include <errno.h>
70#include <mem.h>
71#include "inflate.h"
72
73/** Maximum bits in the Huffman code */
74#define MAX_HUFFMAN_BIT 15
75
76/** Number of length codes */
77#define MAX_LEN 29
78/** Number of distance codes */
79#define MAX_DIST 30
80/** Number of order codes */
81#define MAX_ORDER 19
82/** Number of literal/length codes */
83#define MAX_LITLEN 286
84/** Number of fixed literal/length codes */
85#define MAX_FIXED_LITLEN 288
86
87/** Number of all codes */
88#define MAX_CODE (MAX_LITLEN + MAX_DIST)
89
90/** Check for input buffer overrun condition */
91#define CHECK_OVERRUN(state) \
92 do { \
93 if ((state).overrun) \
94 return ELIMIT; \
95 } while (false)
96
97
98/** Inflate algorithm state
99 *
100 */
101typedef struct {
102 uint8_t *dest; /**< Output buffer */
103 size_t destlen; /**< Output buffer size */
104 size_t destcnt; /**< Position in the output buffer */
105
106 uint8_t *src; /**< Input buffer */
107 size_t srclen; /**< Input buffer size */
108 size_t srccnt; /**< Position in the input buffer */
109
110 uint16_t bitbuf; /**< Bit buffer */
111 size_t bitlen; /**< Number of bits in the bit buffer */
112
113 bool overrun; /**< Overrun condition */
114} inflate_state_t;
115
116/** Huffman code description
117 *
118 */
119typedef struct {
120 uint16_t *count; /**< Array of symbol counts */
121 uint16_t *symbol; /**< Array of symbols */
122} huffman_t;
123
124/** Length codes
125 *
126 */
127static const uint16_t lens[MAX_LEN] = {
128 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
129 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
130};
131
132/** Extended length codes
133 *
134 */
135static const uint16_t lens_ext[MAX_LEN] = {
136 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
137 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
138};
139
140/** Distance codes
141 *
142 */
143static const uint16_t dists[MAX_DIST] = {
144 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
145 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
146 8193, 12289, 16385, 24577
147};
148
149/** Extended distance codes
150 *
151 */
152static const uint16_t dists_ext[MAX_DIST] = {
153 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
154 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
155 12, 12, 13, 13
156};
157
158/** Order codes
159 *
160 */
161static const short order[MAX_ORDER] = {
162 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
163};
164
165/** Static length symbol counts
166 *
167 */
168static uint16_t len_count[MAX_HUFFMAN_BIT + 1] = {
169 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
170};
171
172/** Static length symbols
173 *
174 */
175static uint16_t len_symbol[MAX_FIXED_LITLEN] = {
176 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268,
177 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 0, 1, 2,
178 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
179 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
180 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
181 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
182 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
183 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
184 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
185 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
186 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
187 140, 141, 142, 143, 280, 281, 282, 283, 284, 285, 286, 287, 144,
188 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
189 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
190 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
191 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
192 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
193 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
194 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
195 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
196 249, 250, 251, 252, 253, 254, 255
197};
198
199/** Static distance symbol counts
200 *
201 */
202static uint16_t dist_count[MAX_HUFFMAN_BIT + 1] = {
203 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
204};
205
206/** Static distance symbols
207 *
208 */
209static uint16_t dist_symbol[MAX_DIST] = {
210 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
211 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
212};
213
214/** Huffman code for lengths
215 *
216 */
217static huffman_t len_code = {
218 .count = len_count,
219 .symbol = len_symbol
220};
221
222/** Huffman code for distances
223 *
224 */
225static huffman_t dist_code = {
226 .count = dist_count,
227 .symbol = dist_symbol
228};
229
230/** Get bits from the bit buffer
231 *
232 * @param state Inflate state.
233 * @param cnt Number of bits to return (at most 16).
234 *
235 * @return Returned bits.
236 *
237 */
238static inline uint16_t get_bits(inflate_state_t *state, size_t cnt)
239{
240 /* Bit accumulator for at least 20 bits */
241 uint32_t val = state->bitbuf;
242
243 while (state->bitlen < cnt) {
244 if (state->srccnt == state->srclen) {
245 state->overrun = true;
246 return 0;
247 }
248
249 /* Load 8 more bits */
250 val |= ((uint32_t) state->src[state->srccnt]) << state->bitlen;
251 state->srccnt++;
252 state->bitlen += 8;
253 }
254
255 /* Update bits in the buffer */
256 state->bitbuf = (uint16_t) (val >> cnt);
257 state->bitlen -= cnt;
258
259 return ((uint16_t) (val & ((1 << cnt) - 1)));
260}
261
262/** Decode `stored' block
263 *
264 * @param state Inflate state.
265 *
266 * @return EOK on success.
267 * @return ELIMIT on input buffer overrun.
268 * @return ENOMEM on output buffer overrun.
269 * @return EINVAL on invalid data.
270 *
271 */
272static int inflate_stored(inflate_state_t *state)
273{
274 /* Discard bits in the bit buffer */
275 state->bitbuf = 0;
276 state->bitlen = 0;
277
278 if (state->srccnt + 4 > state->srclen)
279 return ELIMIT;
280
281 uint16_t len =
282 state->src[state->srccnt] | (state->src[state->srccnt + 1] << 8);
283 uint16_t len_compl =
284 state->src[state->srccnt + 2] | (state->src[state->srccnt + 3] << 8);
285
286 /* Check block length and its complement */
287 if (((int16_t) len) != ~((int16_t) len_compl))
288 return EINVAL;
289
290 state->srccnt += 4;
291
292 /* Check input buffer size */
293 if (state->srccnt + len > state->srclen)
294 return ELIMIT;
295
296 /* Check output buffer size */
297 if (state->destcnt + len > state->destlen)
298 return ENOMEM;
299
300 /* Copy data */
301 memcpy(state->dest + state->destcnt, state->src + state->srccnt, len);
302 state->srccnt += len;
303 state->destcnt += len;
304
305 return EOK;
306}
307
308/** Decode a symbol using the Huffman code
309 *
310 * @param state Inflate state.
311 * @param huffman Huffman code.
312 * @param symbol Decoded symbol.
313 *
314 * @param EOK on success.
315 * @param EINVAL on invalid Huffman code.
316 *
317 */
318static int huffman_decode(inflate_state_t *state, huffman_t *huffman,
319 uint16_t *symbol)
320{
321 uint16_t code = 0; /* Decoded bits */
322 size_t first = 0; /* First code of the given length */
323 size_t index = 0; /* Index of the first code of the given length
324 in the symbol table */
325
326 size_t len; /* Current number of bits in the code */
327 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
328 /* Get next bit */
329 code |= get_bits(state, 1);
330 CHECK_OVERRUN(*state);
331
332 uint16_t count = huffman->count[len];
333 if (code < first + count) {
334 /* Return decoded symbol */
335 *symbol = huffman->symbol[index + code - first];
336 return EOK;
337 }
338
339 /* Update for next length */
340 index += count;
341 first += count;
342 first <<= 1;
343 code <<= 1;
344 }
345
346 return EINVAL;
347}
348
349/** Construct Huffman tables from canonical Huffman code
350 *
351 * @param huffman Constructed Huffman tables.
352 * @param length Lengths of the canonical Huffman code.
353 * @param n Number of lengths.
354 *
355 * @return 0 if the Huffman code set is complete.
356 * @return Negative value for an over-subscribed code set.
357 * @return Positive value for an incomplete code set.
358 *
359 */
360static int16_t huffman_construct(huffman_t *huffman, uint16_t *length, size_t n)
361{
362 /* Count number of codes for each length */
363 size_t len;
364 for (len = 0; len <= MAX_HUFFMAN_BIT; len++)
365 huffman->count[len] = 0;
366
367 /* We assume that the lengths are within bounds */
368 size_t symbol;
369 for (symbol = 0; symbol < n; symbol++)
370 huffman->count[length[symbol]]++;
371
372 if (huffman->count[0] == n) {
373 /* The code is complete, but decoding will fail */
374 return 0;
375 }
376
377 /* Check for an over-subscribed or incomplete set of lengths */
378 int16_t left = 1;
379 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
380 left <<= 1;
381 left -= huffman->count[len];
382 if (left < 0) {
383 /* Over-subscribed */
384 return left;
385 }
386 }
387
388 /* Generate offsets into symbol table */
389 uint16_t offs[MAX_HUFFMAN_BIT + 1];
390
391 offs[1] = 0;
392 for (len = 1; len < MAX_HUFFMAN_BIT; len++)
393 offs[len + 1] = offs[len] + huffman->count[len];
394
395 for (symbol = 0; symbol < n; symbol++) {
396 if (length[symbol] != 0) {
397 huffman->symbol[offs[length[symbol]]] = symbol;
398 offs[length[symbol]]++;
399 }
400 }
401
402 return left;
403}
404
405/** Decode literal/length and distance codes
406 *
407 * Decode until end-of-block code.
408 *
409 * @param state Inflate state.
410 * @param len_code Huffman code for literal/length.
411 * @param dist_code Huffman code for distance.
412 *
413 * @return EOK on success.
414 * @return ENOENT on distance too large.
415 * @return EINVAL on invalid Huffman code.
416 * @return ELIMIT on input buffer overrun.
417 * @return ENOMEM on output buffer overrun.
418 *
419 */
420static int inflate_codes(inflate_state_t *state, huffman_t* len_code,
421 huffman_t* dist_code)
422{
423 uint16_t symbol;
424
425 do {
426 int err = huffman_decode(state, len_code, &symbol);
427 if (err != EOK) {
428 /* Error decoding */
429 return err;
430 }
431
432 if (symbol < 256) {
433 /* Write out literal */
434 if (state->destcnt == state->destlen)
435 return ENOMEM;
436
437 state->dest[state->destcnt] = (uint8_t) symbol;
438 state->destcnt++;
439 } else if (symbol > 256) {
440 /* Compute length */
441 symbol -= 257;
442 if (symbol >= 29)
443 return EINVAL;
444
445 size_t len = lens[symbol] + get_bits(state, lens_ext[symbol]);
446 CHECK_OVERRUN(*state);
447
448 /* Get distance */
449 err = huffman_decode(state, dist_code, &symbol);
450 if (err != EOK)
451 return err;
452
453 size_t dist = dists[symbol] + get_bits(state, dists_ext[symbol]);
454 if (dist > state->destcnt)
455 return ENOENT;
456
457 if (state->destcnt + len > state->destlen)
458 return ENOMEM;
459
460 while (len > 0) {
461 /* Copy len bytes from distance bytes back */
462 state->dest[state->destcnt]
463 = state->dest[state->destcnt - dist];
464 state->destcnt++;
465 len--;
466 }
467 }
468 } while (symbol != 256);
469
470 return EOK;
471}
472
473/** Decode `fixed codes' block
474 *
475 * @param state Inflate state.
476 * @param len_code Huffman code for literal/length.
477 * @param dist_code Huffman code for distance.
478 *
479 * @return EOK on success.
480 * @return ENOENT on distance too large.
481 * @return EINVAL on invalid Huffman code.
482 * @return ELIMIT on input buffer overrun.
483 * @return ENOMEM on output buffer overrun.
484 *
485 */
486static int inflate_fixed(inflate_state_t *state, huffman_t *len_code,
487 huffman_t *dist_code)
488{
489 return inflate_codes(state, len_code, dist_code);
490}
491
492/** Decode `dynamic codes' block
493 *
494 * @param state Inflate state.
495 *
496 * @return EOK on success.
497 * @return ENOENT on distance too large.
498 * @return EINVAL on invalid Huffman code.
499 * @return ELIMIT on input buffer overrun.
500 * @return ENOMEM on output buffer overrun.
501 *
502 */
503static int inflate_dynamic(inflate_state_t *state)
504{
505 uint16_t length[MAX_CODE];
506 uint16_t dyn_len_count[MAX_HUFFMAN_BIT + 1];
507 uint16_t dyn_len_symbol[MAX_LITLEN];
508 uint16_t dyn_dist_count[MAX_HUFFMAN_BIT + 1];
509 uint16_t dyn_dist_symbol[MAX_DIST];
510 huffman_t dyn_len_code;
511 huffman_t dyn_dist_code;
512
513 dyn_len_code.count = dyn_len_count;
514 dyn_len_code.symbol = dyn_len_symbol;
515
516 dyn_dist_code.count = dyn_dist_count;
517 dyn_dist_code.symbol = dyn_dist_symbol;
518
519 /* Get number of bits in each table */
520 uint16_t nlen = get_bits(state, 5) + 257;
521 CHECK_OVERRUN(*state);
522
523 uint16_t ndist = get_bits(state, 5) + 1;
524 CHECK_OVERRUN(*state);
525
526 uint16_t ncode = get_bits(state, 4) + 4;
527 CHECK_OVERRUN(*state);
528
529 if ((nlen > MAX_LITLEN) || (ndist > MAX_DIST)
530 || (ncode > MAX_ORDER))
531 return EINVAL;
532
533 /* Read code length code lengths */
534 uint16_t index;
535 for (index = 0; index < ncode; index++) {
536 length[order[index]] = get_bits(state, 3);
537 CHECK_OVERRUN(*state);
538 }
539
540 /* Set missing lengths to zero */
541 for (index = ncode; index < MAX_ORDER; index++)
542 length[order[index]] = 0;
543
544 /* Build Huffman code */
545 int16_t rc = huffman_construct(&dyn_len_code, length, MAX_ORDER);
546 if (rc != 0)
547 return EINVAL;
548
549 /* Read length/literal and distance code length tables */
550 index = 0;
551 while (index < nlen + ndist) {
552 uint16_t symbol;
553 int err = huffman_decode(state, &dyn_len_code, &symbol);
554 if (err != EOK)
555 return EOK;
556
557 if (symbol < 16) {
558 length[index] = symbol;
559 index++;
560 } else {
561 uint16_t len = 0;
562
563 if (symbol == 16) {
564 if (index == 0)
565 return EINVAL;
566
567 len = length[index - 1];
568 symbol = get_bits(state, 2) + 3;
569 CHECK_OVERRUN(*state);
570 } else if (symbol == 17) {
571 symbol = get_bits(state, 3) + 3;
572 CHECK_OVERRUN(*state);
573 } else {
574 symbol = get_bits(state, 7) + 11;
575 CHECK_OVERRUN(*state);
576 }
577
578 if (index + symbol > nlen + ndist)
579 return EINVAL;
580
581 while (symbol > 0) {
582 length[index] = len;
583 index++;
584 symbol--;
585 }
586 }
587 }
588
589 /* Check for end-of-block code */
590 if (length[256] == 0)
591 return EINVAL;
592
593 /* Build Huffman tables for literal/length codes */
594 rc = huffman_construct(&dyn_len_code, length, nlen);
595 if ((rc < 0) || ((rc > 0) && (dyn_len_code.count[0] + 1 != nlen)))
596 return EINVAL;
597
598 /* Build Huffman tables for distance codes */
599 rc = huffman_construct(&dyn_dist_code, length + nlen, ndist);
600 if ((rc < 0) || ((rc > 0) && (dyn_dist_code.count[0] + 1 != ndist)))
601 return EINVAL;
602
603 return inflate_codes(state, &dyn_len_code, &dyn_dist_code);
604}
605
606/** Inflate data
607 *
608 * @param src Source data buffer.
609 * @param srclen Source buffer size (bytes).
610 * @param dest Destination data buffer.
611 * @param destlen Destination buffer size (bytes).
612 *
613 * @return EOK on success.
614 * @return ENOENT on distance too large.
615 * @return EINVAL on invalid Huffman code or invalid deflate data.
616 * @return ELIMIT on input buffer overrun.
617 * @return ENOMEM on output buffer overrun.
618 *
619 */
620int inflate(void *src, size_t srclen, void *dest, size_t destlen)
621{
622 /* Initialize the state */
623 inflate_state_t state;
624
625 state.dest = (uint8_t *) dest;
626 state.destlen = destlen;
627 state.destcnt = 0;
628
629 state.src = (uint8_t *) src;
630 state.srclen = srclen;
631 state.srccnt = 0;
632
633 state.bitbuf = 0;
634 state.bitlen = 0;
635
636 state.overrun = false;
637
638 uint16_t last;
639 int ret = 0;
640
641 do {
642 /* Last block is indicated by a non-zero bit */
643 last = get_bits(&state, 1);
644 CHECK_OVERRUN(state);
645
646 /* Block type */
647 uint16_t type = get_bits(&state, 2);
648 CHECK_OVERRUN(state);
649
650 switch (type) {
651 case 0:
652 ret = inflate_stored(&state);
653 break;
654 case 1:
655 ret = inflate_fixed(&state, &len_code, &dist_code);
656 break;
657 case 2:
658 ret = inflate_dynamic(&state);
659 break;
660 default:
661 ret = EINVAL;
662 }
663 } while ((!last) && (ret == 0));
664
665 return ret;
666}
Note: See TracBrowser for help on using the repository browser.