source: mainline/uspace/lib/compress/inflate.c@ cf573ec

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since cf573ec was 904b1bc, checked in by Jiri Svoboda <jiri@…>, 7 years ago

Fix remaining ccheck issues.

  • Property mode set to 100644
File size: 17.4 KB
Line 
1/*
2 * Copyright (c) 2010 Mark Adler
3 * Copyright (c) 2010 Martin Decky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @file
31 * @brief Implementation of inflate decompression
32 *
33 * A simple inflate implementation (decompression of `deflate' stream as
34 * described by RFC 1951) based on puff.c by Mark Adler. This code is
35 * currently not optimized for speed but for readability.
36 *
37 * All dynamically allocated memory memory is taken from the stack. The
38 * stack usage should be typically bounded by 2 KB.
39 *
40 * Original copyright notice:
41 *
42 * Copyright (C) 2002-2010 Mark Adler, all rights reserved
43 * version 2.1, 4 Apr 2010
44 *
45 * This software is provided 'as-is', without any express or implied
46 * warranty. In no event will the author be held liable for any damages
47 * arising from the use of this software.
48 *
49 * Permission is granted to anyone to use this software for any purpose,
50 * including commercial applications, and to alter it and redistribute it
51 * freely, subject to the following restrictions:
52 *
53 * 1. The origin of this software must not be misrepresented; you must not
54 * claim that you wrote the original software. If you use this software
55 * in a product, an acknowledgment in the product documentation would be
56 * appreciated but is not required.
57 * 2. Altered source versions must be plainly marked as such, and must not
58 * be misrepresented as being the original software.
59 * 3. This notice may not be removed or altered from any source
60 * distribution.
61 *
62 * Mark Adler <madler@alumni.caltech.edu>
63 *
64 */
65
66#include <stddef.h>
67#include <stdint.h>
68#include <stdbool.h>
69#include <errno.h>
70#include <mem.h>
71#include "inflate.h"
72
73/** Maximum bits in the Huffman code */
74#define MAX_HUFFMAN_BIT 15
75
76/** Number of length codes */
77#define MAX_LEN 29
78/** Number of distance codes */
79#define MAX_DIST 30
80/** Number of order codes */
81#define MAX_ORDER 19
82/** Number of literal/length codes */
83#define MAX_LITLEN 286
84/** Number of fixed literal/length codes */
85#define MAX_FIXED_LITLEN 288
86
87/** Number of all codes */
88#define MAX_CODE (MAX_LITLEN + MAX_DIST)
89
90/** Check for input buffer overrun condition */
91#define CHECK_OVERRUN(state) \
92 do { \
93 if ((state).overrun) \
94 return ELIMIT; \
95 } while (false)
96
97
98/** Inflate algorithm state
99 *
100 */
101typedef struct {
102 uint8_t *dest; /**< Output buffer */
103 size_t destlen; /**< Output buffer size */
104 size_t destcnt; /**< Position in the output buffer */
105
106 uint8_t *src; /**< Input buffer */
107 size_t srclen; /**< Input buffer size */
108 size_t srccnt; /**< Position in the input buffer */
109
110 uint16_t bitbuf; /**< Bit buffer */
111 size_t bitlen; /**< Number of bits in the bit buffer */
112
113 bool overrun; /**< Overrun condition */
114} inflate_state_t;
115
116/** Huffman code description
117 *
118 */
119typedef struct {
120 uint16_t *count; /**< Array of symbol counts */
121 uint16_t *symbol; /**< Array of symbols */
122} huffman_t;
123
124/** Length codes
125 *
126 */
127static const uint16_t lens[MAX_LEN] = {
128 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
129 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
130};
131
132/** Extended length codes
133 *
134 */
135static const uint16_t lens_ext[MAX_LEN] = {
136 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
137 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
138};
139
140/** Distance codes
141 *
142 */
143static const uint16_t dists[MAX_DIST] = {
144 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
145 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
146 8193, 12289, 16385, 24577
147};
148
149/** Extended distance codes
150 *
151 */
152static const uint16_t dists_ext[MAX_DIST] = {
153 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
154 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
155 12, 12, 13, 13
156};
157
158/** Order codes
159 *
160 */
161static const short order[MAX_ORDER] = {
162 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
163};
164
165/** Static length symbol counts
166 *
167 */
168static uint16_t len_count[MAX_HUFFMAN_BIT + 1] = {
169 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
170};
171
172/** Static length symbols
173 *
174 */
175static uint16_t len_symbol[MAX_FIXED_LITLEN] = {
176 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268,
177 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 0, 1, 2,
178 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
179 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
180 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
181 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
182 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
183 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
184 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
185 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
186 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
187 140, 141, 142, 143, 280, 281, 282, 283, 284, 285, 286, 287, 144,
188 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
189 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
190 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
191 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
192 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
193 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
194 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
195 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
196 249, 250, 251, 252, 253, 254, 255
197};
198
199/** Static distance symbol counts
200 *
201 */
202static uint16_t dist_count[MAX_HUFFMAN_BIT + 1] = {
203 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
204};
205
206/** Static distance symbols
207 *
208 */
209static uint16_t dist_symbol[MAX_DIST] = {
210 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
211 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
212};
213
214/** Huffman code for lengths
215 *
216 */
217static huffman_t len_code = {
218 .count = len_count,
219 .symbol = len_symbol
220};
221
222/** Huffman code for distances
223 *
224 */
225static huffman_t dist_code = {
226 .count = dist_count,
227 .symbol = dist_symbol
228};
229
230/** Get bits from the bit buffer
231 *
232 * @param state Inflate state.
233 * @param cnt Number of bits to return (at most 16).
234 *
235 * @return Returned bits.
236 *
237 */
238static inline uint16_t get_bits(inflate_state_t *state, size_t cnt)
239{
240 /* Bit accumulator for at least 20 bits */
241 uint32_t val = state->bitbuf;
242
243 while (state->bitlen < cnt) {
244 if (state->srccnt == state->srclen) {
245 state->overrun = true;
246 return 0;
247 }
248
249 /* Load 8 more bits */
250 val |= ((uint32_t) state->src[state->srccnt]) << state->bitlen;
251 state->srccnt++;
252 state->bitlen += 8;
253 }
254
255 /* Update bits in the buffer */
256 state->bitbuf = (uint16_t) (val >> cnt);
257 state->bitlen -= cnt;
258
259 return ((uint16_t) (val & ((1 << cnt) - 1)));
260}
261
262/** Decode `stored' block
263 *
264 * @param state Inflate state.
265 *
266 * @return EOK on success.
267 * @return ELIMIT on input buffer overrun.
268 * @return ENOMEM on output buffer overrun.
269 * @return EINVAL on invalid data.
270 *
271 */
272static errno_t inflate_stored(inflate_state_t *state)
273{
274 /* Discard bits in the bit buffer */
275 state->bitbuf = 0;
276 state->bitlen = 0;
277
278 if (state->srccnt + 4 > state->srclen)
279 return ELIMIT;
280
281 uint16_t len =
282 state->src[state->srccnt] | (state->src[state->srccnt + 1] << 8);
283 uint16_t len_compl =
284 state->src[state->srccnt + 2] | (state->src[state->srccnt + 3] << 8);
285
286 /* Check block length and its complement */
287 if (((int16_t) len) != ~((int16_t) len_compl))
288 return EINVAL;
289
290 state->srccnt += 4;
291
292 /* Check input buffer size */
293 if (state->srccnt + len > state->srclen)
294 return ELIMIT;
295
296 /* Check output buffer size */
297 if (state->destcnt + len > state->destlen)
298 return ENOMEM;
299
300 /* Copy data */
301 memcpy(state->dest + state->destcnt, state->src + state->srccnt, len);
302 state->srccnt += len;
303 state->destcnt += len;
304
305 return EOK;
306}
307
308/** Decode a symbol using the Huffman code
309 *
310 * @param state Inflate state.
311 * @param huffman Huffman code.
312 * @param symbol Decoded symbol.
313 *
314 * @param EOK on success.
315 * @param EINVAL on invalid Huffman code.
316 *
317 */
318static errno_t huffman_decode(inflate_state_t *state, huffman_t *huffman,
319 uint16_t *symbol)
320{
321 /* Decode bits */
322 uint16_t code = 0;
323
324 /* First code of the given length */
325 size_t first = 0;
326
327 /*
328 * Index of the first code of the given length
329 * in the symbol table
330 */
331 size_t index = 0;
332
333 /* Current number of bits in the code */
334 size_t len;
335
336 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
337 /* Get next bit */
338 code |= get_bits(state, 1);
339 CHECK_OVERRUN(*state);
340
341 uint16_t count = huffman->count[len];
342 if (code < first + count) {
343 /* Return decoded symbol */
344 *symbol = huffman->symbol[index + code - first];
345 return EOK;
346 }
347
348 /* Update for next length */
349 index += count;
350 first += count;
351 first <<= 1;
352 code <<= 1;
353 }
354
355 return EINVAL;
356}
357
358/** Construct Huffman tables from canonical Huffman code
359 *
360 * @param huffman Constructed Huffman tables.
361 * @param length Lengths of the canonical Huffman code.
362 * @param n Number of lengths.
363 *
364 * @return 0 if the Huffman code set is complete.
365 * @return Negative value for an over-subscribed code set.
366 * @return Positive value for an incomplete code set.
367 *
368 */
369static int16_t huffman_construct(huffman_t *huffman, uint16_t *length, size_t n)
370{
371 /* Count number of codes for each length */
372 size_t len;
373 for (len = 0; len <= MAX_HUFFMAN_BIT; len++)
374 huffman->count[len] = 0;
375
376 /* We assume that the lengths are within bounds */
377 size_t symbol;
378 for (symbol = 0; symbol < n; symbol++)
379 huffman->count[length[symbol]]++;
380
381 if (huffman->count[0] == n) {
382 /* The code is complete, but decoding will fail */
383 return 0;
384 }
385
386 /* Check for an over-subscribed or incomplete set of lengths */
387 int16_t left = 1;
388 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
389 left <<= 1;
390 left -= huffman->count[len];
391 if (left < 0) {
392 /* Over-subscribed */
393 return left;
394 }
395 }
396
397 /* Generate offsets into symbol table */
398 uint16_t offs[MAX_HUFFMAN_BIT + 1];
399
400 offs[1] = 0;
401 for (len = 1; len < MAX_HUFFMAN_BIT; len++)
402 offs[len + 1] = offs[len] + huffman->count[len];
403
404 for (symbol = 0; symbol < n; symbol++) {
405 if (length[symbol] != 0) {
406 huffman->symbol[offs[length[symbol]]] = symbol;
407 offs[length[symbol]]++;
408 }
409 }
410
411 return left;
412}
413
414/** Decode literal/length and distance codes
415 *
416 * Decode until end-of-block code.
417 *
418 * @param state Inflate state.
419 * @param len_code Huffman code for literal/length.
420 * @param dist_code Huffman code for distance.
421 *
422 * @return EOK on success.
423 * @return ENOENT on distance too large.
424 * @return EINVAL on invalid Huffman code.
425 * @return ELIMIT on input buffer overrun.
426 * @return ENOMEM on output buffer overrun.
427 *
428 */
429static errno_t inflate_codes(inflate_state_t *state, huffman_t *len_code,
430 huffman_t *dist_code)
431{
432 uint16_t symbol;
433
434 do {
435 errno_t err = huffman_decode(state, len_code, &symbol);
436 if (err != EOK) {
437 /* Error decoding */
438 return err;
439 }
440
441 if (symbol < 256) {
442 /* Write out literal */
443 if (state->destcnt == state->destlen)
444 return ENOMEM;
445
446 state->dest[state->destcnt] = (uint8_t) symbol;
447 state->destcnt++;
448 } else if (symbol > 256) {
449 /* Compute length */
450 symbol -= 257;
451 if (symbol >= 29)
452 return EINVAL;
453
454 size_t len = lens[symbol] + get_bits(state, lens_ext[symbol]);
455 CHECK_OVERRUN(*state);
456
457 /* Get distance */
458 err = huffman_decode(state, dist_code, &symbol);
459 if (err != EOK)
460 return err;
461
462 size_t dist = dists[symbol] + get_bits(state, dists_ext[symbol]);
463 if (dist > state->destcnt)
464 return ENOENT;
465
466 if (state->destcnt + len > state->destlen)
467 return ENOMEM;
468
469 while (len > 0) {
470 /* Copy len bytes from distance bytes back */
471 state->dest[state->destcnt] =
472 state->dest[state->destcnt - dist];
473 state->destcnt++;
474 len--;
475 }
476 }
477 } while (symbol != 256);
478
479 return EOK;
480}
481
482/** Decode `fixed codes' block
483 *
484 * @param state Inflate state.
485 * @param len_code Huffman code for literal/length.
486 * @param dist_code Huffman code for distance.
487 *
488 * @return EOK on success.
489 * @return ENOENT on distance too large.
490 * @return EINVAL on invalid Huffman code.
491 * @return ELIMIT on input buffer overrun.
492 * @return ENOMEM on output buffer overrun.
493 *
494 */
495static errno_t inflate_fixed(inflate_state_t *state, huffman_t *len_code,
496 huffman_t *dist_code)
497{
498 return inflate_codes(state, len_code, dist_code);
499}
500
501/** Decode `dynamic codes' block
502 *
503 * @param state Inflate state.
504 *
505 * @return EOK on success.
506 * @return ENOENT on distance too large.
507 * @return EINVAL on invalid Huffman code.
508 * @return ELIMIT on input buffer overrun.
509 * @return ENOMEM on output buffer overrun.
510 *
511 */
512static errno_t inflate_dynamic(inflate_state_t *state)
513{
514 uint16_t length[MAX_CODE];
515 uint16_t dyn_len_count[MAX_HUFFMAN_BIT + 1];
516 uint16_t dyn_len_symbol[MAX_LITLEN];
517 uint16_t dyn_dist_count[MAX_HUFFMAN_BIT + 1];
518 uint16_t dyn_dist_symbol[MAX_DIST];
519 huffman_t dyn_len_code;
520 huffman_t dyn_dist_code;
521
522 dyn_len_code.count = dyn_len_count;
523 dyn_len_code.symbol = dyn_len_symbol;
524
525 dyn_dist_code.count = dyn_dist_count;
526 dyn_dist_code.symbol = dyn_dist_symbol;
527
528 /* Get number of bits in each table */
529 uint16_t nlen = get_bits(state, 5) + 257;
530 CHECK_OVERRUN(*state);
531
532 uint16_t ndist = get_bits(state, 5) + 1;
533 CHECK_OVERRUN(*state);
534
535 uint16_t ncode = get_bits(state, 4) + 4;
536 CHECK_OVERRUN(*state);
537
538 if ((nlen > MAX_LITLEN) || (ndist > MAX_DIST) ||
539 (ncode > MAX_ORDER))
540 return EINVAL;
541
542 /* Read code length code lengths */
543 uint16_t index;
544 for (index = 0; index < ncode; index++) {
545 length[order[index]] = get_bits(state, 3);
546 CHECK_OVERRUN(*state);
547 }
548
549 /* Set missing lengths to zero */
550 for (index = ncode; index < MAX_ORDER; index++)
551 length[order[index]] = 0;
552
553 /* Build Huffman code */
554 int16_t rc = huffman_construct(&dyn_len_code, length, MAX_ORDER);
555 if (rc != 0)
556 return EINVAL;
557
558 /* Read length/literal and distance code length tables */
559 index = 0;
560 while (index < nlen + ndist) {
561 uint16_t symbol;
562 errno_t err = huffman_decode(state, &dyn_len_code, &symbol);
563 if (err != EOK)
564 return EOK;
565
566 if (symbol < 16) {
567 length[index] = symbol;
568 index++;
569 } else {
570 uint16_t len = 0;
571
572 if (symbol == 16) {
573 if (index == 0)
574 return EINVAL;
575
576 len = length[index - 1];
577 symbol = get_bits(state, 2) + 3;
578 CHECK_OVERRUN(*state);
579 } else if (symbol == 17) {
580 symbol = get_bits(state, 3) + 3;
581 CHECK_OVERRUN(*state);
582 } else {
583 symbol = get_bits(state, 7) + 11;
584 CHECK_OVERRUN(*state);
585 }
586
587 if (index + symbol > nlen + ndist)
588 return EINVAL;
589
590 while (symbol > 0) {
591 length[index] = len;
592 index++;
593 symbol--;
594 }
595 }
596 }
597
598 /* Check for end-of-block code */
599 if (length[256] == 0)
600 return EINVAL;
601
602 /* Build Huffman tables for literal/length codes */
603 rc = huffman_construct(&dyn_len_code, length, nlen);
604 if ((rc < 0) || ((rc > 0) && (dyn_len_code.count[0] + 1 != nlen)))
605 return EINVAL;
606
607 /* Build Huffman tables for distance codes */
608 rc = huffman_construct(&dyn_dist_code, length + nlen, ndist);
609 if ((rc < 0) || ((rc > 0) && (dyn_dist_code.count[0] + 1 != ndist)))
610 return EINVAL;
611
612 return inflate_codes(state, &dyn_len_code, &dyn_dist_code);
613}
614
615/** Inflate data
616 *
617 * @param src Source data buffer.
618 * @param srclen Source buffer size (bytes).
619 * @param dest Destination data buffer.
620 * @param destlen Destination buffer size (bytes).
621 *
622 * @return EOK on success.
623 * @return ENOENT on distance too large.
624 * @return EINVAL on invalid Huffman code or invalid deflate data.
625 * @return ELIMIT on input buffer overrun.
626 * @return ENOMEM on output buffer overrun.
627 *
628 */
629errno_t inflate(void *src, size_t srclen, void *dest, size_t destlen)
630{
631 /* Initialize the state */
632 inflate_state_t state;
633
634 state.dest = (uint8_t *) dest;
635 state.destlen = destlen;
636 state.destcnt = 0;
637
638 state.src = (uint8_t *) src;
639 state.srclen = srclen;
640 state.srccnt = 0;
641
642 state.bitbuf = 0;
643 state.bitlen = 0;
644
645 state.overrun = false;
646
647 uint16_t last;
648 errno_t ret = EOK;
649
650 do {
651 /* Last block is indicated by a non-zero bit */
652 last = get_bits(&state, 1);
653 CHECK_OVERRUN(state);
654
655 /* Block type */
656 uint16_t type = get_bits(&state, 2);
657 CHECK_OVERRUN(state);
658
659 switch (type) {
660 case 0:
661 ret = inflate_stored(&state);
662 break;
663 case 1:
664 ret = inflate_fixed(&state, &len_code, &dist_code);
665 break;
666 case 2:
667 ret = inflate_dynamic(&state);
668 break;
669 default:
670 ret = EINVAL;
671 }
672 } while ((!last) && (ret == 0));
673
674 return ret;
675}
Note: See TracBrowser for help on using the repository browser.