/* * LZHUF.C English version 1.0 * Based on Japanese version 29-NOV-1988 * LZSS coded by Haruhiko OKUMURA * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI * Edited and translated to English by Kenji RIKITAKE */ #include #include #include #include #include #include #include #include /* These values are Turbo-C dependent; EXIT_SUCCESS, EXIT_FAILURE renamed by Kenji */ #define EXIT_OK 0 #define EXIT_FAILED -1 #define getc(infile) memgetc() #define memputc(z) *PutFilePosition++ = z; #define memgetc() ((FilePosition >= EndOfFilePosition) ? ((int) -1) : (UBYTE) *FilePosition++) FILE *infile, *outfile; UBYTE *FilePosition; UBYTE *EndOfFilePosition; UBYTE *PutFilePosition; ULONG textsize = 0, codesize = 0, printcount = 0; void Error(char *message) { printf("\n%s\n", message); exit(EXIT_FAILED); } #define READ_BUFFER_SIZE 8192 #define WRITE_BUFFER_SIZE 8192 /* LZSS Parameters */ #define N 4096 /* Size of string buffer */ #define F 60 /* Size of look-ahead buffer */ #define THRESHOLD 2 #define NIL N /* End of tree's node */ unsigned char *text_buf; short match_position, match_length, *lson, *rson, *dad; void __regargs InsertNode(short r); void __regargs DeleteNode(short p); void __regargs Putcode(short l, USHORT c); void __regargs update(short c); void __regargs EncodeChar(USHORT c); void __regargs EncodePosition(USHORT c); void __regargs Decode(ULONG Size); void InitTree(void) /* Initializing tree */ { register short i; for (i = (N + 1); i <= (N + 256); i++) { rson[i] = NIL; /* root */ } for (i = 0; i < N; i++) { dad[i] = NIL; /* node */ } } void __regargs InsertNode(short r) /* Inserting node to the tree */ { USHORT c; short cmp; register short i; register short p; register UBYTE *key; cmp = 1; key = &text_buf[r]; p = (N + 1) + key[0]; rson[r] = lson[r] = NIL; match_length = 0; for ( ; ; ) { if (cmp >= 0) { if (rson[p] != NIL) { p = rson[p]; } else { rson[p] = r; dad[r] = p; return; } } else { if (lson[p] != NIL) { p = lson[p]; } else { lson[p] = r; dad[r] = p; return; } } for (i = 1; i < F; i++) { if ((cmp = key[i] - text_buf[p + i]) != 0) { break; } } if (i > THRESHOLD) { if (i > match_length) { match_position = ((r - p) & (N - 1)) - 1; if ((match_length = i) >= F) { break; } } if (i == match_length) { if ((c = ((r - p) & (N - 1)) - 1) < match_position) { match_position = c; } } } } dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p]; dad[lson[p]] = r; dad[rson[p]] = r; if (rson[dad[p]] == p) { rson[dad[p]] = r; } else { lson[dad[p]] = r; } dad[p] = NIL; /* remove p */ } void __regargs DeleteNode(short p) /* Deleting node from the tree */ { register short q; if (dad[p] == NIL) { return; } if (rson[p] == NIL) { q = lson[p]; } else if (lson[p] == NIL) { q = rson[p]; } else { q = lson[p]; if (rson[q] != NIL) { do { q = rson[q]; } while (rson[q] != NIL); rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q]; lson[q] = lson[p]; dad[lson[p]] = q; } rson[q] = rson[p]; dad[rson[p]] = q; } dad[q] = dad[p]; if (rson[dad[p]] == p) { rson[dad[p]] = q; } else { lson[dad[p]] = q; } dad[p] = NIL; } /* Huffman coding parameters */ #define N_CHAR (256 - THRESHOLD + F) /* character code (= 0..N_CHAR-1) */ #define T (N_CHAR * 2 - 1) /* Size of table */ #define R (T - 1) /* root position */ #define MAX_FREQ 0x8000 /* update when cumulative frequency */ /* reaches to this value */ typedef unsigned char uchar; /* * Tables for encoding/decoding upper 6 bits of * sliding dictionary pointer */ /* encoder table */ uchar p_len[64] = { 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }; uchar p_code[64] = { 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C, 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE, 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE, 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF }; /* decoder table */ uchar d_code[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D, 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13, 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15, 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B, 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B, 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, }; uchar d_len[256] = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, }; USHORT freq[T + 1]; /* cumulative freq table */ /* * pointing parent nodes. * area [T..(T + N_CHAR - 1)] are pointers for leaves */ short prnt[(T + N_CHAR)]; /* pointing children nodes (son[], son[] + 1)*/ short son[T]; USHORT getbuf = 0; uchar getlen = 0; short GetBit() /* get one bit */ { register short i; while (getlen <= 8) { if ((i = getc(infile)) < 0) { i = 0; } getbuf |= i << (8 - getlen); getlen += 8; } i = getbuf; getbuf <<= 1; getlen--; return (i < 0); } short GetByte() /* get a byte */ { register USHORT i; while (getlen <= 8) { if ((i = getc(infile)) < 0) { i = 0; } getbuf |= i << (8 - getlen); getlen += 8; } i = getbuf; getbuf <<= 8; getlen -= 8; return (short) (i >> 8); } USHORT putbuf = 0; uchar putlen = 0; void __regargs Putcode(short l, USHORT c) /* output c bits */ { putbuf |= c >> putlen; if ((putlen += l) >= 8) { putc((long) putbuf >> 8, outfile); if ((putlen -= 8) >= 8) { putc((long) putbuf, outfile); /* codesize += 2; */ putlen -= 8; putbuf = c << (l - putlen); } else { putbuf <<= 8; /* codesize++; */ } } } /* initialize freq tree */ void StartHuff() { register short i, j; for (i = 0; i < N_CHAR; i++) { freq[i] = 1; son[i] = (i + T); prnt[(i + T)] = i; } i = 0; j = N_CHAR; while (j <= R) { freq[j] = (freq[i] + freq[(i + 1)]); son[j] = i; prnt[i] = prnt[(i + 1)] = j; i += 2; j++; } freq[T] = 0xffff; prnt[R] = 0; } /* reconstruct freq tree */ void reconst() { register short i, k; short j; USHORT f, l; /* halven cumulative freq for leaf nodes */ j = 0; for (i = 0; i < T; i++) { if (son[i] >= T) { freq[j] = (freq[i] + 1) / 2; son[j] = son[i]; j++; } } /* make a tree : first, connect children nodes */ for (i = 0, j = N_CHAR; j < T; i += 2, j++) { k = i + 1; f = freq[j] = (freq[i] + freq[k]); for (k = j - 1; f < freq[k]; k--); k++; l = (j - k) * 2; /* movmem() is Turbo-C dependent rewritten to memmove() by Kenji */ movmem((void *) &freq[k], (void *) &freq[k + 1], l); freq[k] = f; movmem((void *) &son[k], (void *) &son[k + 1], l); son[k] = i; } /* connect parent nodes */ for (i = 0; i < T; i++) { if ((k = son[i]) >= T) { prnt[k] = i; } else { prnt[k] = prnt[(k + 1)] = i; } } } /* update freq tree */ void __regargs update(short c) { register short i; register short j; short k; short l; if (freq[R] == MAX_FREQ) { reconst(); } c = prnt[(c + T)]; do { k = ++freq[c]; /* swap nodes to keep the tree freq-ordered */ if (k > freq[l = (c + 1)]) { while (k > freq[++l]); l--; freq[c] = freq[l]; freq[l] = k; i = son[c]; prnt[i] = l; if (i < T) prnt[(i + 1)] = l; j = son[l]; son[l] = i; prnt[j] = c; if (j < T) prnt[(j + 1)] = c; son[c] = j; c = l; } c = prnt[c]; } while (c); /* do it until reaching the root */ } void __regargs EncodeChar(USHORT c) { register short k; register USHORT code; short len; code = 0; len = 0; k = prnt[(c + T)]; /* search connections from leaf node to the root */ do { code >>= 1; if (k & 1) { code += 0x8000; } len++; k = prnt[k]; } while (k != R); Putcode(len, code); update(c); } void __regargs EncodePosition(USHORT c) { register USHORT i; /* output upper 6 bits with encoding */ i = c >> 6; Putcode((p_len[i]), (p_code[i] << 8)); /* output lower 6 bits directly */ Putcode(6, ((c & 0x3f) << 10)); } void EncodeEnd() { if (putlen) { putc((long) putbuf >> 8, outfile); /* codesize++; */ } } short DecodeChar() { register USHORT c; c = son[R]; /* * start searching tree from the root to leaves. * choose node #(son[]) if input bit == 0 * else choose #(son[]+1) (input bit == 1) */ while (c < T) { c += GetBit(); c = son[c]; } c -= T; update(c); return (short) c; } short DecodePosition() { register USHORT i, j; USHORT c; /* decode upper 6 bits from given table */ i = GetByte(); c = (d_code[i] << 6); j = d_len[i]; /* input lower 6 bits directly */ j -= 2; while (j--) { i = (i << 1) + GetBit(); } return (short) (c | i & 0x3f); } /* Compression */ #define memgetc() ((LocalFilePosition >= LocalEndOfFilePosition) ? ((int) -1) : (UBYTE) *LocalFilePosition++) Encode(ULONG Length) /* Encoding/Compressing */ { short i; short c; short len; register short r; register short s; register UBYTE *LocalFilePosition; register UBYTE *LocalEndOfFilePosition; short last_match_length; LocalFilePosition = FilePosition; LocalEndOfFilePosition = EndOfFilePosition; InitHuff(); StartHuff(); InitTree(); s = 0; r = (N - F); for (i = s; i < r; i++) { text_buf[i] = ' '; } for (len = 0; len < F && (c = getc(infile)) != EOF; len++) { text_buf[(r + len)] = c; } textsize = len; for (i = 1; i <= F; i++) { InsertNode((r - i)); } InsertNode(r); do { if (match_length > len) { match_length = len; } if (match_length <= THRESHOLD) { match_length = 1; EncodeChar(text_buf[r]); } else { EncodeChar(((255 - THRESHOLD) + match_length)); EncodePosition(match_position); } last_match_length = match_length; for (i = 0; i < last_match_length && (c = getc(infile)) != EOF; i++) { DeleteNode(s); text_buf[s] = c; if (s < (F - 1)) { text_buf[(s + N)] = c; } s = ((s + 1) & (N - 1)); r = ((r + 1) & (N - 1)); InsertNode(r); } while (i++ < last_match_length) { DeleteNode(s); s = (s + 1) & (N - 1); r = (r + 1) & (N - 1); if (--len) { InsertNode(r); } } } while (len > 0); EncodeEnd(); FilePosition = LocalFilePosition; EndOfFilePosition = LocalEndOfFilePosition; } #undef memgetc() #define memputc(z) *LocalPutFilePosition++ = z; void __regargs Decode(ULONG Size) /* Decoding/Uncompressing */ { register short c; register short r; short k; short i, j; ULONG count; register UBYTE *LocalPutFilePosition = PutFilePosition; InitHuff(); /* Sets textsize to zero; be careful! */ textsize = Size; if (textsize == 0) { return; } StartHuff(); for (i = 0; i < (N - F); i++) { text_buf[i] = ' '; } r = (N - F); for (count = 0; count < textsize; ) { c = DecodeChar(); if (c < 256) { memputc(c); text_buf[r++] = c; r &= (N - 1); count++; } else { i = (r - DecodePosition() - 1) & (N - 1); j = c - (255 - THRESHOLD); count += j; for (k = 0; k < j; k++) { c = text_buf[(i + k) & (N - 1)]; memputc(c); text_buf[r++] = c; r &= (N - 1); } } } PutFilePosition = LocalPutFilePosition; } #undef memputc(z) /* int memgetc() { if (FilePosition >= EndOfFilePosition) { printf("EOF"); return ((int) -1); } else { FilePosition++; return (*(FilePosition-1)); } } */ /* memputc(UBYTE Char) { *PutFilePosition++ = Char; } */ InitHuff() { textsize = 0; codesize = 0; printcount = 0; getbuf = 0; getlen = 0; putbuf = 0; putlen = 0; }