/* Panin's Panic - a program for finding numeric patterns. Author: Brendan D. McKay Australian National University Canberra, ACT 0200, Australia bdm@cs.anu.edu.au http://cs.anu.edu.au/~bdm Needs 64 bit arithmetic See separate documentation. */ #ifdef __alpha typedef long bitvector; #define BIT(i) (1L << (i)) #else #ifdef _WIN32 typedef _int64 bitvector; /* Thanks to Mark Glyshaw. */ #define BIT(i) (1I64 << (i)) #else typedef long long bitvector; #define BIT(i) (1LL << (i)) #endif #endif #define MAXFEATURES 1000 #define NWRITE 0 #define USEPLACEVALS 1 #define USENUMVALS 1 #ifndef APP #define APP 0 /* if nonzero, include apostrophe */ #endif static int numval[] = {1,2,3,4,5,6,7,8,9, 10,20,30,40,50,60,70,80,90, 100,200,300,400,500,600,700,800}; #define NUMVAL(c) ((c) >= 'a' && (c) <= 'z' ? numval[(c)-'a'] : \ (c) >= 'A' && (c) <= 'Z' ? numval[(c)-'A'] : 0) #define ISVOWEL(c) ((c)=='a' || (c)=='e' || (c)=='i' || (c)=='o' || (c)=='u'\ || (c)=='A' || (c)=='E' || (c)=='I' || (c)=='O' || (c)=='U') #define PLACEVAL(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+1 : \ (c) >= 'A' && (c) <= 'Z' ? (c)-'A'+1 : 0) #define MAXC 100000 #define ISLETTER(c) ((c) >= 'a' && (c) <= 'z' || (c) >= 'A' && (c) <= 'Z') #define ISEOS(c) ((c) == '.' || (c) == '!' || (c) == '?') #define ISEVEN(x) (((x)&1) == 0) #define ISODD(x) ((x)&1) #define POPC(x,c) {bitvector pc = x; c = 0; while(pc){++c; pc &= pc-1;}} /* Supported word types. Mark with an appended special character. + common noun ^ proper noun (these two make "noun") # pronoun @ article (a, the, an) ~ verb */ #define ISWTCHAR(c) \ ((c) == '+' || (c) == '^' || (c) == '@' || (c) == '~' || (c) == '#') #define COMMONNOUN 1 #define PROPERNOUN 2 #define ARTICLE 3 #define VERB 4 #define PRONOUN 5 static struct { int c; /* the character */ int icw; /* index of char in word */ int ics; /* index of char in sentence */ int iw; /* index of word */ int iws; /* index of word in sentence */ int is; /* index of sentence */ int val; /* numerical value */ int place; /* place value */ int wordtype; /* word type */ bitvector att; /* boolean attributes */ } data[MAXC]; static int nc,nw,ns; static char* attribute[] = { #define EVENC BIT(0) "ec", #define ODDC BIT(1) "oc", #define EVENCW BIT(2) "ecw", #define ODDCW BIT(3) "ocw", #define FCW BIT(4) "fcw", #define LCW BIT(5) "lcw", #define FLCW BIT(6) "flcw", #define FLWS BIT(7) "flws", #define VOWEL BIT(8) "vowel", #define CONS BIT(9) "cons", #define EVENW BIT(10) "ew", #define ODDW BIT(11) "ow", #define FW BIT(12) "fw", #define LW BIT(13) "lw", #define FWS BIT(14) "fws", #define LWS BIT(15) "lws", #define EVENWS BIT(16) "ews", #define ODDWS BIT(17) "ows", #define EVENCS BIT(18) "ecs", #define ODDCS BIT(19) "ocs", #define EVENS BIT(20) "es", #define ODDS BIT(21) "os", #define FCS BIT(22) "fcs", #define LCS BIT(23) "lcs", #define FLCS BIT(24) "flcs", #define FLW BIT(25) "flw", #define FS BIT(26) "fs", #define LS BIT(27) "ls", #define FLS BIT(28) "fls", #define WSV BIT(29) "wsv", #define WSC BIT(30) "wsc", #define WEV BIT(31) "wev", #define WEC BIT(32) "wec", #define SSV BIT(33) "ssv", #define SSC BIT(34) "ssc", #define SEV BIT(35) "sev", #define SEC BIT(36) "sec", #define OLW BIT(37) "olw", #define ELW BIT(38) "elw", #define OLSW BIT(39) "olsw", #define ELSW BIT(40) "elsw", #define OLSC BIT(41) "olsc", #define ELSC BIT(42) "elsc", #define WCNOUN BIT(43) "cnoun", #define XCNOUN BIT(44) "-cnoun", #define WPNOUN BIT(45) "pnoun", #define XPNOUN BIT(46) "-pnoun", #define WART BIT(47) "art", #define XART BIT(48) "-art", #define WVERB BIT(49) "verb", #define XVERB BIT(50) "-verb", #define WNOUN BIT(51) "noun", #define XNOUN BIT(52) "-noun", #define WPRONOUN BIT(53) "pronoun", #define XPRONOUN BIT(54) "-pronoun", #define SSPN BIT(55) "sspn", #define SSCN BIT(56) "sscn", #define SSN BIT(57) "ssn", #define SSPRO BIT(58) "sspr", #define SSART BIT(59) "ssart", #define SSVB BIT(60) "ssvb" }; /* Don't go beyond BIT(61). */ #define NUMATT (sizeof(attribute)/sizeof(char*)) #define MAXATT BIT(NUMATT) /* List here combinations that are not of interest if they appear all together. Certainly include combinations that guarantee a zero total. Be careful about inclusions, eg FCW|ODDCW because it may prevent an interesting subdivision. */ static bitvector incompat[] = { WCNOUN|XCNOUN, WPNOUN|XPNOUN, WNOUN|XNOUN, WVERB|XVERB, WART|XART, WCNOUN|WPNOUN, WCNOUN|WVERB, WCNOUN|WART, WPNOUN|WVERB, WPNOUN|WART, WVERB|WART, WVERB|WNOUN, WART|WNOUN, WPRONOUN|XPRONOUN, WPRONOUN|WCNOUN, WPRONOUN|WPNOUN, WPRONOUN|WART, WPRONOUN|WVERB, SSCN|SSPN, SSCN|SSVB, SSCN|SSART, SSCN|SSPRO, SSPN|SSVB, SSPN|SSART, SSPN|SSPRO, SSN|SSVB, SSN|SSART, SSN|SSPRO, SSART|SSPRO, SSART|SSVB, SSVB|SSPRO, EVENC|ODDC, EVENCW|ODDCW, EVENCS|ODDCS, EVENW|ODDW, EVENWS|ODDWS, EVENS|ODDS, VOWEL|CONS, FS|LS, /* FS|FLS, LS|FLS, */ FW|LW, /* FW|FLW, LW|FLW, */ /* FCW|LCW, FCW|FLCW, LCW|FLCW, */ FCS|LCS, /* FCS|FLCS, LCS|FLCS, */ /* FCW|ODDCW, */ FCW|EVENCW, FLCW|ODDCW, FLCW|EVENCW, /* FCS|ODDCS, */ FCS|EVENCS, FLCS|ODDCS, FLCS|EVENCS, /* FW|ODDW, */ FW|EVENW, /* FWS|ODDWS, */ FWS|EVENWS, /* FCW|FCS, LCW|LCS, FWS|FS, LWS|LW, */ ELW|OLW, ELSC|OLSC, ELSW|OLSW, ELW|LCW|EVENCW, OLW|LCW|ODDCW, ELSW|LWS|EVENWS, OLSW|LWS|ODDWS, ELSC|LCS|EVENCS, OLSC|LCS|ODDCS}; #define NINCOMPAT (sizeof(incompat)/sizeof(bitvector)) #include /******************************************************************/ static void readfile(f) FILE *f; { int c,ncw,nws,ncs,wt; nc = nw = ns = 0; ncw = nws = ncs = 0; wt = 0; while ((c = getc(f)) != EOF) { if (ISLETTER(c) #if APP || c == '\'' #endif ) { data[nc].c = c; data[nc].iw = nw; data[nc].is = ns; data[nc].icw = ncw; data[nc].iws = nws; data[nc].ics = ncs; data[nc].val = NUMVAL(c); data[nc].place = PLACEVAL(c); data[nc].att = 0; data[nc].wordtype = wt; ++nc; ++ncw; ++ncs; } else if (c == '\'' || c == '-') continue; else if (ISEOS(c)) { if (nc) ++nw; if (nc) ++ns; ncw = nws = ncs = 0; wt = 0; while ((c = getc(f)) != EOF && !ISLETTER(c) && !ISWTCHAR(c)) {} if (c != EOF) ungetc(c,f); } else if (ISWTCHAR(c)) { if (c == '+') wt = COMMONNOUN; else if (c == '^') wt = PROPERNOUN; else if (c == '@') wt = ARTICLE; else if (c == '~') wt = VERB; else if (c == '#') wt = PRONOUN; } else { while ((c = getc(f)) != EOF && !ISLETTER(c) && !ISEOS(c) && !ISWTCHAR(c)) {} if (!ISEOS(c)) { if (nc) ++nw; if (nc) ++nws; ncw = 0; wt = 0; } if (c != EOF) ungetc(c,f); } } } /******************************************************************/ static setattributes() { register int i,j; int wt; bitvector wtmask; #define SET(a) data[i].att |= (a) #define A(a) (data[i].a) #define PA(a) (data[i-1].a) #define NA(a) (data[i+1].a) nw = data[nc-1].iw + 1; ns = data[nc-1].is + 1; for (i = 0; i < nc; ++i) { if (ISVOWEL(A(c))) SET(VOWEL); else SET(CONS); if (ISODD(i)) SET(EVENC); else SET(ODDC); if (ISODD(A(icw))) SET(EVENCW); else SET(ODDCW); if (A(icw) == 0) SET(FCW); if (i == nc-1 || NA(icw) == 0) SET(LCW); if (A(icw) == 0 || i == nc-1 || NA(icw) == 0) SET(FLCW); if (ISODD(A(iw))) SET(EVENW); else SET(ODDW); if (A(iw) == 0) SET(FW); if (A(iw) == nw-1) SET(LW); if (A(iw) == 0 || A(iw) == nw-1) SET(FLW); if (A(is) == 0) SET(FS); if (A(is) == ns-1) SET(LS); if (A(is) == 0 || A(is) == ns-1) SET(FLS); if (A(iws) == 0) {SET(FWS); SET(FLWS);} if (ISODD(A(iws))) SET(EVENWS); else SET(ODDWS); if (ISODD(A(ics))) SET(EVENCS); else SET(ODDCS); if (A(ics) == 0) SET(FCS); if (i == nc-1 || NA(ics) == 0) SET(LCS); if (A(ics) == 0 || i == nc-1 || NA(ics) == 0) SET(FLCS); if (ISODD(A(is))) SET(EVENS); else SET(ODDS); if (A(wordtype) == COMMONNOUN) SET(WCNOUN); else SET(XCNOUN); if (A(wordtype) == PROPERNOUN) SET(WPNOUN); else SET(XPNOUN); if (A(wordtype) == VERB) SET(WVERB); else SET(XVERB); if (A(wordtype) == ARTICLE) SET(WART); else SET(XART); if (A(wordtype) == COMMONNOUN || A(wordtype) == PROPERNOUN) SET(WNOUN); else SET(XNOUN); } for (i = nc; --i >= 0;) { if (A(att) & LCS) { j = A(iw); while (i >= 0 && A(iw) == j) { SET(LWS); SET(FLWS); --i; } ++i; } } for (i = 0; i < nc;) { j = A(iw); if (ISVOWEL(A(c))) while (i < nc && A(iw) == j) { SET(WSV); ++i; } else while (i < nc && A(iw) == j) { SET(WSC); ++i; } } for (i = 0; i < nc;) { j = A(is); if (ISVOWEL(A(c))) while (i < nc && A(is) == j) { SET(SSV); ++i; } else while (i < nc && A(is) == j) { SET(SSC); ++i; } } for (i = nc-1; i >= 0;) { j = A(iw); if (ISVOWEL(A(c))) while (i >= 0 && A(iw) == j) { SET(WEV); --i; } else while (i >= 0 && A(iw) == j) { SET(WEC); --i; } } for (i = nc-1; i >= 0;) { j = A(is); if (ISVOWEL(A(c))) while (i >= 0 && A(is) == j) { SET(SEV); --i; } else while (i >= 0 && A(is) == j) { SET(SEC); --i; } } for (i = nc-1; i >= 0;) { j = A(iw); if (ISODD(A(icw))) while (i >= 0 && A(iw) == j) { SET(ELW); --i; } else while (i >= 0 && A(iw) == j) { SET(OLW); --i; } } for (i = nc-1; i >= 0;) { j = A(is); if (ISODD(A(ics))) while (i >= 0 && A(is) == j) { SET(ELSC); --i; } else while (i >= 0 && A(is) == j) { SET(OLSC); --i; } } for (i = nc-1; i >= 0;) { j = A(is); if (ISODD(A(iws))) while (i >= 0 && A(is) == j) { SET(ELSW); --i; } else while (i >= 0 && A(is) == j) { SET(OLSW); --i; } } for (i = 0; i < nc; ++i) { j = A(is); wt = A(wordtype); if (wt == PROPERNOUN) wtmask = SSPN | SSN; else if (wt == COMMONNOUN) wtmask = SSCN | SSN; else if (wt == ARTICLE) wtmask = SSART; else if (wt == PRONOUN) wtmask = SSPRO; else if (wt == VERB) wtmask = SSVB; else wtmask = 0; while (i < nc && A(is) == j) { SET(wtmask); ++i; } } } /******************************************************************/ static void printatt(att) bitvector att; { bitvector a; int i; if (att == 0) printf(" ALL"); else { for (i = 0, a = BIT(0); a < MAXATT; ++i, a <<= 1) if (att & a) printf(" %s",attribute[i]); } } /******************************************************************/ static bitvector nextatt(a,forced,maxpop) /* first interesting attribute after a */ bitvector a,forced; int maxpop; { register i,k,pop,ok; bitvector x; x = a & (-a); POPC(a,pop); if (pop > maxpop) a += x; else ++a; do { if (a >= MAXATT) return MAXATT; ok = 1; k = 0; for (i = 0; k <= NINCOMPAT; i = (i == NINCOMPAT-1 ? 0 : i + 1)) { if ((a & incompat[i]) == incompat[i]) { x = incompat[i] & (-incompat[i]); /* last bit of ict[i] */ a = (a & ~(x-1)) + x; k = 0; } else ++k; } POPC(a,pop); while (pop > maxpop) { x = a & (-a); a += x; POPC(a,pop); ok = 0; } if ((a & forced) != forced) { a |= forced; ok = 0; } } while (!ok); return a == 0 ? MAXATT : a; } /******************************************************************/ static void count(a,pnum,pval,pplace) /* count chars including given attribute */ bitvector a; int *pnum,*pval,*pplace; { register int i,num,val,place; num = val = place = 0; for (i = 0; i < nc; ++i) if ((data[i].att & a) == a) { ++num; val += data[i].val; place += data[i].place; } *pnum = num; *pval = val; *pplace = place; } /******************************************************************/ static void putnum(n,p) int n,p; { printf("%d",n); if (n % p != 0 || n == p) return; printf("="); while (n % p == 0) { printf("%d",p); n /= p; if (n != 1) printf("*"); } if (n != 1) printf("%d",n); } /******************************************************************/ main(argc,argv) int argc; char *argv[]; { FILE *f; register int i; register bitvector a,pop; int c,p,num,val,place,len; int minlen,maxok,maxext,nfeat,nfatt; int doprompt; bitvector x,fatt[MAXFEATURES],fnum[MAXFEATURES]; bitvector forced; char fcname[200]; if (argc != 2) { fprintf(stderr,"Usage: panin file\n"); exit(2); } if ((f = fopen(argv[1],"r")) == NULL) { fprintf(stderr,"Can't open %s for reading\n",argv[1]); exit(2); } doprompt = isatty(0) && isatty(1); readfile(f); setattributes(); for (i = 0; i < nc; ++i) { if (i >= NWRITE && nc-i >= NWRITE) continue; printf("%c %d/%d/%d %d/%d %d (%d)" , data[i].c,data[i].icw,data[i].ics,i, data[i].iws,data[i].iw,data[i].is,data[i].wordtype); printatt(data[i].att); printf("\n"); } while ((doprompt && printf("p maxok maxext forced = ")), scanf("%d%d%d",&p,&maxok,&maxext) == 3) { forced = 0; for (;;) { while ((c = getchar()) == ' ') {}; if (c == EOF || c == '\n') break; ungetc(c,stdin); scanf("%s",fcname); for (i = 0; i < NUMATT; ++i) if (strcmp(fcname,attribute[i]) == 0) break; if (i == NUMATT) printf("Unknown attribute %s ignored\n",fcname); else forced |= BIT(i); } #ifdef __alpha if (!doprompt) printf("\np = %d; maxok = %d; maxext = %d; forced = %lx\n", p,maxok,maxext,forced); #else if (!doprompt) printf("\np = %d; maxok = %d; maxext = %d; forced = %llx\n", p,maxok,maxext,forced); #endif nfeat = nfatt = 0; POPC(forced,minlen); for (len = minlen; len <= maxok || len <= maxext; ++len) { for (a = forced; a < MAXATT; a = nextatt(a,forced,len)) { POPC(a,pop); if (pop != len) continue; count(a,&num,&val,&place); if (num == 0 || num % p != 0 #if USENUMVALS && val % p != 0 #endif #if USEPLACEVALS && place % p != 0 #endif ) continue; if (len > maxok) { for (i = 0; i < nfatt; ++i) if ((fatt[i] & a) == fatt[i]) { x = a ^ fatt[i]; POPC(x,pop); if (pop == 1) break; } if (i == nfatt) continue; } for (i = 0; i < nfatt; ++i) if ((fatt[i] & a) == fatt[i] && fnum[i] == num) break; if (i < nfatt) continue; if ((len < maxok || len < maxext) && nfatt < MAXFEATURES) { fatt[nfatt] = a; fnum[nfatt] = num; ++nfatt; } ++nfeat; printatt(a); if (num > 0 && num % p == 0) { printf(" #"); putnum(num,p); } #if USENUMVALS if (val > 0 && val % p == 0) { printf(" V"); putnum(val,p); } #endif #if USEPLACEVALS if (place > 0 && place % p == 0) { printf(" P"); putnum(place,p); } #endif printf("\n"); } } printf("%d features found\n",nfeat); } }