* Copyright (C) 1995-2004 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include <linux/zutil.h>
#include "inftrees.h"
#include "inflate.h"
#include "inffast.h"
#ifndef ASMINF
union uu {
unsigned short us;
unsigned char b[2];
};
static inline unsigned short
get_unaligned16(const unsigned short *p)
{
union uu mm;
unsigned char *b = (unsigned char *)p;
mm.b[0] = b[0];
mm.b[1] = b[1];
return mm.us;
}
Decode literal, length, and distance codes and write out the resulting
literal and match bytes until either not enough input or output is
available, an end-of-block is encountered, or a data error is encountered.
When large enough input and output buffers are supplied to inflate(), for
example, a 16K input buffer and a 64K output buffer, more than 95% of the
inflate execution time is spent in this routine.
Entry assumptions:
state->mode == LEN
strm->avail_in >= 6
strm->avail_out >= 258
start >= strm->avail_out
state->bits < 8
On return, state->mode is one of:
LEN -- ran out of enough output space or enough available input
TYPE -- reached end of block code, inflate() to interpret next block
BAD -- error in block data
Notes:
- The maximum input bits used by a length/distance pair is 15 bits for the
length code, 5 bits for the length extra, 15 bits for the distance code,
and 13 bits for the distance extra. This totals 48 bits, or six bytes.
Therefore if strm->avail_in >= 6, then there is enough input to avoid
checking for available input while decoding.
- The maximum bytes that a single length/distance pair can output is 258
bytes, which is the maximum length that can be coded. inflate_fast()
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
- @start: inflate()'s starting value for strm->avail_out
*/
void inflate_fast(z_streamp strm, unsigned start)
{
struct inflate_state *state;
const unsigned char *in;
const unsigned char *last;
unsigned char *out;
unsigned char *beg;
unsigned char *end;
#ifdef INFLATE_STRICT
unsigned dmax;
#endif
unsigned wsize;
unsigned whave;
unsigned write;
unsigned char *window;
unsigned long hold;
unsigned bits;
code const *lcode;
code const *dcode;
unsigned lmask;
unsigned dmask;
code this;
unsigned op;
unsigned len;
unsigned dist;
unsigned char *from;
state = (struct inflate_state *)strm->state;
in = strm->next_in;
last = in + (strm->avail_in - 5);
out = strm->next_out;
beg = out - (start - strm->avail_out);
end = out + (strm->avail_out - 257);
#ifdef INFLATE_STRICT
dmax = state->dmax;
#endif
wsize = state->wsize;
whave = state->whave;
write = state->write;
window = state->window;
hold = state->hold;
bits = state->bits;
lcode = state->lencode;
dcode = state->distcode;
lmask = (1U << state->lenbits) - 1;
dmask = (1U << state->distbits) - 1;
input data or output space */
do {
if (bits < 15) {
hold += (unsigned long)(*in++) << bits;
bits += 8;
hold += (unsigned long)(*in++) << bits;
bits += 8;
}
this = lcode[hold & lmask];
dolen:
op = (unsigned)(this.bits);
hold >>= op;
bits -= op;
op = (unsigned)(this.op);
if (op == 0) {
*out++ = (unsigned char)(this.val);
}
else if (op & 16) {
len = (unsigned)(this.val);
op &= 15;
if (op) {
if (bits < op) {
hold += (unsigned long)(*in++) << bits;
bits += 8;
}
len += (unsigned)hold & ((1U << op) - 1);
hold >>= op;
bits -= op;
}
if (bits < 15) {
hold += (unsigned long)(*in++) << bits;
bits += 8;
hold += (unsigned long)(*in++) << bits;
bits += 8;
}
this = dcode[hold & dmask];
dodist:
op = (unsigned)(this.bits);
hold >>= op;
bits -= op;
op = (unsigned)(this.op);
if (op & 16) {
dist = (unsigned)(this.val);
op &= 15;
if (bits < op) {
hold += (unsigned long)(*in++) << bits;
bits += 8;
if (bits < op) {
hold += (unsigned long)(*in++) << bits;
bits += 8;
}
}
dist += (unsigned)hold & ((1U << op) - 1);
#ifdef INFLATE_STRICT
if (dist > dmax) {
strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
}
#endif
hold >>= op;
bits -= op;
op = (unsigned)(out - beg);
if (dist > op) {
op = dist - op;
if (op > whave) {
strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
}
from = window;
if (write == 0) {
from += wsize - op;
if (op < len) {
len -= op;
do {
*out++ = *from++;
} while (--op);
from = out - dist;
}
}
else if (write < op) {
from += wsize + write - op;
op -= write;
if (op < len) {
len -= op;
do {
*out++ = *from++;
} while (--op);
from = window;
if (write < len) {
op = write;
len -= op;
do {
*out++ = *from++;
} while (--op);
from = out - dist;
}
}
}
else {
from += write - op;
if (op < len) {
len -= op;
do {
*out++ = *from++;
} while (--op);
from = out - dist;
}
}
while (len > 2) {
*out++ = *from++;
*out++ = *from++;
*out++ = *from++;
len -= 3;
}
if (len) {
*out++ = *from++;
if (len > 1)
*out++ = *from++;
}
}
else {
unsigned short *sout;
unsigned long loops;
from = out - dist;
if (!((long)(out - 1) & 1)) {
*out++ = *from++;
len--;
}
sout = (unsigned short *)(out);
if (dist > 2) {
unsigned short *sfrom;
sfrom = (unsigned short *)(from);
loops = len >> 1;
do
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
*sout++ = *sfrom++;
#else
*sout++ = get_unaligned16(sfrom++);
#endif
while (--loops);
out = (unsigned char *)sout;
from = (unsigned char *)sfrom;
} else {
unsigned short pat16;
pat16 = *(sout-1);
if (dist == 1) {
union uu mm;
mm.us = pat16;
mm.b[0] = mm.b[1];
pat16 = mm.us;
}
loops = len >> 1;
do
*sout++ = pat16;
while (--loops);
out = (unsigned char *)sout;
}
if (len & 1)
*out++ = *from++;
}
}
else if ((op & 64) == 0) {
this = dcode[this.val + (hold & ((1U << op) - 1))];
goto dodist;
}
else {
strm->msg = (char *)"invalid distance code";
state->mode = BAD;
break;
}
}
else if ((op & 64) == 0) {
this = lcode[this.val + (hold & ((1U << op) - 1))];
goto dolen;
}
else if (op & 32) {
state->mode = TYPE;
break;
}
else {
strm->msg = (char *)"invalid literal/length code";
state->mode = BAD;
break;
}
} while (in < last && out < end);
len = bits >> 3;
in -= len;
bits -= len << 3;
hold &= (1U << bits) - 1;
strm->next_in = in;
strm->next_out = out;
strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
strm->avail_out = (unsigned)(out < end ?
257 + (end - out) : 257 - (out - end));
state->hold = hold;
state->bits = bits;
return;
}
inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
- Using bit fields for code structure
- Different op definition to avoid & for extra bits (do & for table bits)
- Three separate decoding do-loops for direct, window, and write == 0
- Special case for distance > 1 copies to do overlapped load and store copy
- Explicit branch predictions (based on measured branch probabilities)
- Deferring match copy and interspersed it with decoding subsequent codes
- Swapping literal/length else
- Swapping window/direct else
- Larger unrolled copy loops (three is about right)
- Moving len -= 3 statement into middle of loop
*/
#endif