/* Program: TXTINFO.C Author : Kim Moser Date : March 17, 1991 System : IBM PC / Borland Turbo C 2.0 Descrip: For any text file piped from stdin, tells: Number of lines Average line length Longest line Shortest line Number of words Average word length Longest word Shortest word */ #include #include #include #include #include typedef unsigned long ulong; static ulong grandlinecount=0; static int FILESPROCESSED=0; /* How many files processed */ static void dofile(FILE *fp); static void dofile(FILE *fp) { int ch; int inword=0; ulong linecount=0, longestlinelen=0, shortestlinelen=-1, totallinelen=0, wordcount=0, longestwordlen=0, shortestwordlen=-1, totalwordlen=0, minwordsonline=-1, maxwordsonline=0; ulong thislinelen=0, thiswordlen=0, wordsonthisline=0; /* File pointers: */ long longestline=-1, shortestline=-1, longestword=-1, shortestword=-1, linestart=0, wordstart=-1; FILESPROCESSED++; while (1) { ch = getc(fp); if ((ch == '\t') || (ch == '\n') || (ch == ' ') || (ch == EOF)) { if (inword) { wordsonthisline++; } if (ch == '\n' || ((ch==EOF) && thislinelen)) { linecount++; if (!(linecount % 100)) { fprintf(stderr, "%lu lines so far\r", linecount); } if ((thislinelen < shortestlinelen) || (shortestlinelen == -1)) { shortestlinelen = thislinelen; shortestline = linestart; } if (thislinelen > longestlinelen) { longestlinelen = thislinelen; longestline = linestart; } totallinelen += thislinelen; thislinelen = 0; if (wordsonthisline > maxwordsonline) maxwordsonline = wordsonthisline; if ((wordsonthisline < minwordsonline) || (minwordsonline==-1)) minwordsonline = wordsonthisline; wordsonthisline = 0; /* Remember where this line begins: */ if ((linestart = ftell(fp)) == -1L) { fprintf(stderr, "ftell() failed for linestart.\n"); } } else { thislinelen++; } if (inword) { wordcount++; if ((thiswordlen < shortestwordlen) || (shortestwordlen == -1)) { shortestwordlen = thiswordlen; shortestword = wordstart; } if (thiswordlen > longestwordlen) { longestwordlen = thiswordlen; longestword = wordstart; } totalwordlen += thiswordlen; thiswordlen = 0; inword = 0; } } else { if (!inword) { if ((wordstart = ftell(fp)) == -1L) { fprintf(stderr, "ftell() failed for wordstart.\n"); } if (wordstart) wordstart--; } inword = 1; thislinelen++; thiswordlen++; } if (ch == EOF) break; } grandlinecount += linecount; fprintf(stderr, "%lu lines (min len=%lu, max len=%lu, avg len=%f)\n", linecount, shortestlinelen, longestlinelen, ((float) totallinelen) / ((float) linecount)); if (shortestline != -1) { fprintf(stderr, "shortest line = '"); if (fseek(fp, shortestline, SEEK_SET)) { fprintf(stderr, "fseek() failed for shortest line offset (%ld).\n", shortestline); } else { while ((ch = getc(fp)) != '\n') { if (ch == EOF) break; fputc(ch, stderr); } fprintf(stderr, "'\n"); } } if (longestline != -1) { fprintf(stderr, "longest line = '"); if (fseek(fp, longestline, SEEK_SET)) { fprintf(stderr, "fseek() failed for longest line offset (%ld).\n", longestline); } else { while ((ch = getc(fp)) != '\n') { if (ch == EOF) break; fputc(ch, stderr); } fprintf(stderr, "'\n"); } } fprintf(stderr, "\n%lu words (min len=%lu, max len=%lu, avg len=%f)\n", wordcount, shortestwordlen, longestwordlen, ((float) totalwordlen)/((float) wordcount)); if (shortestword != -1) { fprintf(stderr, "shortest word = '"); if (fseek(fp, shortestword, SEEK_SET)) { fprintf(stderr, "fseek() failed for shortest word offset (%ld).\n", shortestword); } else { while ((ch = getc(fp)) != EOF) { if (ch == '\n' || ch == '\t' || ch == ' ') break; fputc(ch, stderr); } fprintf(stderr, "'\n"); } } if (longestword != -1) { fprintf(stderr, "longest word = '"); if (fseek(fp, longestword, SEEK_SET)) { fprintf(stderr, "fseek() failed for longest word offset (%ld).\n", longestword); } else { while ((ch = getc(fp)) != EOF) { if (ch == '\n' || ch == '\t' || ch == ' ') break; fputc(ch, stderr); } fprintf(stderr, "'\n"); } } fprintf(stderr, "\nmin words on line: %lu\n", minwordsonline); fprintf(stderr, "max words on line: %lu\n", maxwordsonline); fprintf(stderr, "avg words on line: %f\n", ((float) wordcount) / ((float) linecount)); fprintf(stderr, "\n"); } void expand_and_do(char *s); void expand_and_do(char *s) { int first=1; FILE *fp; struct ffblk info; static char *fname = NULL; int i; while (first ? !findfirst(s, &info, ~0) : !findnext(&info)) { first = 0; /* If it's not (a directory named "." or "..") then print it: */ if (!(info.ff_attrib & FA_DIREC) && !(info.ff_attrib & FA_LABEL)) { /* It's not a directory or a label... */ if (fname != NULL) free(fname); if ((fname = (char*) malloc(strlen(s)+12)) == NULL) { fprintf(stderr, "malloc() failed for filename.\n"); exit(-1); } strcpy(fname, s); /* Add info.ff_name in appropriate spot: */ for (i=strlen(fname)-1; i>=0; i--) { if (fname[i] == '\\' || fname[i] == ':') break; } strcpy(fname+i+1, info.ff_name); fprintf(stderr, "%s\n", fname); if ((fp = fopen(fname, "r")) == NULL) { fprintf(stderr, "fopen() failed for '%s'.\n", info.ff_name); exit(-1); } dofile(fp); if (fclose(fp)) { fprintf(stderr, "fclose() failed for '%s'.\n", info.ff_name); exit(-1); } } } } void main(int argc, char **argv) { int i; if (argc == 1) { fputs("Reading from stdin...\n", stderr); dofile(stdin); } else { for (i=1; i