/* Program: COMPRESS.C Author : Kim Moser Date : 26 December, 1990 System : IBM PC / Borland Turbo C 2.0 Descrip: Compresses text file piped from stdin, to stdout. Assumes all lines are same length. Output is in following format: Unsigned integers, each 5 bits wide. HEADER: INT MEANING 0 Low-byte of length of words in file 1 Hi-byte of length of words in file BODY: INT MEANING 0 How many leading chars are same as that of prev string 1 (0+leading)'th char of string ... n (n+leading)'th char of string n+1 How many leading chars are same as that of prev string etc. Ex: ABC ABD ACE compresses to: HEADER: DATA STANDS FOR 3 Words are 3 chars long (3) 0 Words are 3 chars long (3 + 0*(2^5)) DATA STANDS FOR 0 0 leading chars match prev string 0 'A' (0'th char) 1 'B' (1'th char) 2 'C' (2'th char) 2 2 leading chars match prev string 3 'D' (2'th char) 1 1 leading char matches prev string 2 'C' (1'th char) 4 'E' (2'th char) Usage : COMPRESS outfile */ #include #include #include #include #include #include #if 0 /* Bitwise & with RIGHTMOST[n] to clear all but the rightmost n bits: */ static unsigned char RIGHTMOST[] = { (unsigned char) 0, (unsigned char) 1, (unsigned char) 3, (unsigned char) 7, (unsigned char) 15, (unsigned char) 31, (unsigned char) 63, (unsigned char) 127 }; #define rightmost(_ch,_bits) ((_ch) & RIGHTMOST[(_bits)]) #endif /* Bitwise & with LEFTMOST[n] to clear all but the leftmost n bits: */ static unsigned char LEFTMOST[] = { (unsigned char) (0), (unsigned char) (128), (unsigned char) (128+64), (unsigned char) (128+64+32), (unsigned char) (128+64+32+16), (unsigned char) (128+64+32+16+8), (unsigned char) (128+64+32+16+8+4), (unsigned char) (128+64+32+16+8+4+2), (unsigned char) (128+64+32+16+8+4+2+1) }; #define leftmost(_ch,_bits) ((unsigned char)(((_ch) & (LEFTMOST[(_bits)]))>>(8-(_bits)))) static void out(register int i); static void out(register int i) { static unsigned char bits = '\0'; static int index = 0; /* How many trailing bits in 'bits' are valid (when it reaches 8, we write 'bits' to stdout) */ register unsigned char ch; /* 'i' as an unsigned char */ register int shift; if (i >= 32) { fprintf(stderr, "out(): i (%d) is too large.\n", i); exit(-1); } if (i == -1) { /* Flush */ putchar((unsigned int) (bits << (8-index))); bits = '\0'; index = 0; return; } ch = (unsigned char) i; ch <<= 3; /* Left-justify bits within 'ch' */ shift = min(5, (8-index)); /* Shift 'bits' left to make room for 'shift' bits: */ bits <<= shift; /* Add leftmost 'shift' bits from 'ch' into 'bits': */ bits |= (leftmost(ch, shift)); if ((index += shift) == 8) { putchar((unsigned int) bits); bits = '\0'; index = 0; } /* If they didn't all fit, stuff remaining (leftmost) bits into 'bits': */ if (shift != 5) { ch <<= shift; bits = (leftmost(ch, (5-shift))); index += (5 - shift); } } static char str[512], prev[sizeof(str)]; #define ASTERISK ('Z'-'A'+1) #define ZERO (ASTERISK+1) static void compress(FILE *fp); static void compress(FILE *fp) { register int i; register int first=1; register int len; prev[0] = '\0'; while (fgets(str, sizeof(str)-1, fp) != NULL) { str[strlen(str)-1] = '\0'; /* Remove trailing newline */ if (first) { len = strlen(str); if (str[len-1] == '*') len--; out(0); out(0); #if 0 out(len % 32); out(len / 32); #endif first = 0; } if (str[strlen(str)-1] == '*') { str[strlen(str)-1] = '\0'; out(ASTERISK); } /* Count how many leading chars from str[] match leading chars in prev[]: */ for (i=0; (i<31) && str[i] && prev[i] && (str[i]==prev[i]); i++); out(i); /* Output rest of str[]: */ while (str[i]) { out(str[i]-'A'); i++; } /* 'str[]' becomes new 'prev[]': */ strcpy(prev, str); } out(31); } static int in(void); static int in(void) { static int i; /* Char read from stdin */ static unsigned char ch; /* 'i' as an unsigned char */ static unsigned char index = 0; /* How many leading bits in 'ch' are used */ register int shift; /* Tmp */ register unsigned char r = '\0'; register int got = 0; /* How many rightmost bits in 'r' are valid */ while (got != 5) { if (!index) { if ((i = getchar()) == EOF) { return (i); } index = 8; ch = (unsigned char) i; } shift = min(index, (5-got)); r <<= shift; r |= leftmost(ch, shift); ch <<= shift; index -= shift; got += shift; } return ((int) r); } static char *getword(char *s, int width, int *marked); static char *getword(char *s, int width, int *marked) { register int i; *marked = 0; /* Assume */ if ((i = in()) == 31 || (i == EOF)) return NULL; if (i == ASTERISK) { *marked = 1; if ((i = in()) == EOF) return (NULL); } while (i < width) { s[i++] = in() + 'A'; } return (s); } static void decompress(void); static void decompress(void) { int marked; int width; int i; while (1) { if ((width = in()) == EOF) break; if ((i = in()) == EOF) break; width += i*32; if (!width) break; str[width] = '\0'; while (getword(str, width, &marked) != NULL) { printf("%s%s\n", str, (marked?"*":"")); } } } static void usage(void); static void usage(void) { fputs("\ COMPRESS v1.0 12/26/1990 Copyright (C) Kim Moser All Rights Reserved\n\ Usage: COMPRESS -c [@filelist] | -d\n\ (de)compress stdin to stdout\n\ (or, if @filelist specified, then use files in filelist).\n\ ", stderr); exit(-1); } #define COMPRESS 1 #define DECOMPRESS 2 void main(int argc, char **argv) { int MODE = 0; char *filelistname = NULL; FILE *filelist = NULL; char thefilename[80]; FILE *thefile; if (argv[1][0] != '-') usage(); switch (toupper(argv[1][1])) { case 'C': MODE = COMPRESS; break; case 'D': MODE = DECOMPRESS; break; default: usage(); } if ((MODE == COMPRESS) && (argc == 3)) { filelistname = argv[2] + 1; if ((filelist = fopen(filelistname, "r")) == NULL) { fprintf(stderr, "fopen() failed for filelist '%s'.\n", filelistname); exit(-1); } } if (setmode(fileno((MODE==COMPRESS) ? stdout : stdin), O_BINARY)) { fprintf(stderr, "setmode() failed.\n"); exit(-1); } if (MODE == COMPRESS) { if (filelist == NULL) { compress(stdin); } else { while (fgets(thefilename, sizeof(thefilename)-1, filelist) != NULL) { thefilename[strlen(thefilename)-1] = '\0'; /* Remove trailing newline */ fprintf(stderr, "Compressing '%s'...\n", thefilename); if ((thefile = fopen(thefilename, "r")) == NULL) { fprintf(stderr, "fopen() failed for file '%s'.\n", thefilename); } compress(thefile); if (fclose(thefile)) { fprintf(stderr, "fclose() failed for file '%s'.\n", thefilename); exit(-1); } } if (fclose(filelist)) { fprintf(stderr, "fclose() failed for file list '%s'.\n", filelistname); exit(-1); } } /* Flush 'out' buffer: */ out(-1); } else { decompress(); } }