/* Program: FAKETEXT.C Author : Kim Moser Date : 6 August, 1990 System : IBM PC / Borland Turbo C 2.0 Descrip: Analyzes text files for frequency of all "windows" of text of a given size, then creates an output file based on the frequencies of all "windows" found. Usage : FAKETEXT -w -i -t [-c] */ #include #include #include #include #include #include typedef struct windowrec { long int window; }; static int WIDTH=0; /* Width of window */ static char *WINDOW1 = NULL, *WINDOW2 = NULL; static long int NUMWINDOWS; static char *INFILENAME=NULL, *TMPFILENAME=NULL; static FILE *fin, *ftmp; static int CONT; /* Whether we should use existing TMP file */ /*************************************************************************/ static void halt(void); static void halt(void) { fcloseall(); exit(-1); } static int seekwindow(long int i); static int seekwindow(long int i) { return (!fseek(fin, i, SEEK_SET)); } static void readwindow(long int i, char *dest); static void readwindow(long int i, char *dest) { #ifdef NOT_DEFINED if ((p = ftell(fin)) == -1L) { fprintf(stderr, "readwindow(): ftell() failed.\n"); halt(); } #endif if (!seekwindow(i)) { fprintf(stderr, "readwindow(): seekwindow() failed (i=%ld)\n", i); halt(); } if (fread(dest, (size_t) WIDTH, (size_t) 1, fin) != 1) { fprintf(stderr, "readwindow(): fread() failed.\n"); halt(); } dest[WIDTH] = '\0'; #ifdef NOT_DEFINED /* Dump window: */ printf("window='"); for (i=0; i -i -o -t [-c]\n", stderr); halt(); } static void parse(char *s); static void parse(char *s) { if ((s[0]=='-') || (s[0]=='/')) { switch (toupper(s[1])) { case 'W': if ((WIDTH = atoi(s+2)) == 0) usage(); break; case 'I': INFILENAME = s+2; break; case 'T': TMPFILENAME = s+2; break; case 'C': CONT = 1; break; default: usage(); } } } static void buildndx(void); static void buildndx(void) { long int i, p=0, prev=100; struct windowrec rec; fprintf(stderr, " %% done%c%c%c%c%c%c%c%c%c", 8,8,8,8,8,8,8,8,8); for (i=0; i 0) { swaps++; /* WINDOW1 > WINDOW2, so swap them: */ if (!writewindowrec(i, &rec2)) { fprintf(stderr, "sortndx(): writewindowrec() failed (i=%ld).\n", i); halt(); } if (!writewindowrec(i+1, &rec1)) { fprintf(stderr, "sortndx(): writewindowrec() failed (i=%ld).\n", i); halt(); } } } fprintf(stderr, " (%9ld swaps) %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c", swaps, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8); } while (swaps); fputc('\n', stderr); } #ifdef NOT_DEFINED static void packndx(void); static void packndx(void) { struct windowrec wrec; for (i=0; readwindowrec(i, &wrec); i++) { if (!wrec.count) { /* This record is empty, so find next non-empty record and start copying from it to here, until EOF found: */ for (j=i+1; } } } #endif static long int countwindow(long int index, long int *first); static long int countwindow(long int index, long int *first) /* Given the index of a windowrec, count how many windows start with the text in WINDOW1. Set 'first' to the index of the windowrec of the first one, and return the count. */ { long int i; struct windowrec wr; long int count=0; fprintf(stderr, "countwindow(): index=%ld\n", index); *first = index; /* Assume */ /* Look backwards: */ for (i=index; i >= 0; i--) { if (!readwindowrec(i, &wr)) { fprintf(stderr, "countwindow(): readwindowrec() failed for %d'th window.\n", i); halt(); } readwindow(wr.window, WINDOW2); fprintf(stderr, "Checking if '%s' begins with '%s'\n", WINDOW2, WINDOW1); if (strstr(WINDOW2, WINDOW1) == WINDOW2) { count++; *first = i; } else { break; } } /* Look forwards: */ for (i=index+1; i < NUMWINDOWS; i++) { if (!readwindowrec(i, &wr)) { fprintf(stderr, "countwindow(): readwindowrec() failed for %d'th window.\n", i); halt(); } readwindow(wr.window, WINDOW2); fprintf(stderr, "Checking if '%s' begins with '%s'\n", WINDOW2, WINDOW1); if (strstr(WINDOW2, WINDOW1) == WINDOW2) { count++; } else { break; } } return (count); } static void writefake(void); static void writefake(void) { long int i, count, first; struct windowrec wr; randomize(); /* Pick a window and output it: */ if (!readwindowrec(i = (long int)random((int)NUMWINDOWS), &wr)) { fprintf(stderr, "writefake(): readwindowrec() failed for %ld'th windowrec.\n", i); halt(); } readwindow(wr.window, WINDOW1); printf("%s", WINDOW1); while (1) { /* Throw away first char of window: */ memmove(WINDOW1, WINDOW1 + 1, strlen(WINDOW1) + 1); /* Count how many windows begin with this window: */ count = countwindow(i, &first); printf("count=%ld\n", count); exit(-1); /* Perform weighted random selection: */ i = (long int) random(((int)count)) + first; /* Read new window: */ if (!readwindowrec(i, &wr)) { fprintf(stderr, "writefake(): readwindowrec() failed for %ld'th windowrec.\n", i); halt(); } readwindow(wr.window, WINDOW1); /* Output last character of window: */ printf("%c", WINDOW1[strlen(WINDOW1)-1]); } #if 0 long int i, j, r; struct windowrec wrec; randomize(); for (i=0; i<(NUMWINDOWS/WIDTH); i++) { r = (long int) random((int) NUMWINDOWS); #ifdef NOT_DEFINED for (j=0; 1; j++) { if (!readwindowrec(j, &wrec)) { fprintf(stderr, "writefake(): readwindowrec() failed (j=%ld, r=%ld).\n", j, r); halt(); } if (wrec.count >= r) { break; } else { r -= wrec.count; } } #endif readwindow(j=r); for (j=0; j