#C28.txt, May 4, 2015, Experimental Linguistics Help:=proc(): print(` Freq(DI, AL,K1), TM(DI,AL), BEG(DI,K1) `): print(` MagS1(DI,MAT),MagS(DI,K1) `): end: ALP:=[a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]: #read `W:\\ENGLISH3.txt`: read ENGLISH3: read ENGLISH4: #Freq(DI,AL,K1): inputs a "dictionary" DI in an alphabet #AL, and a positive integer K1, and outputs a list of #size nops(AL) whose i-th entry is the probability #of letter AL[I1] showing up in the K1-th place Freq:=proc(DI, AL,K1) local DI1, TAB,I1, W: DI1:=subs({ seq( AL[I1]=I1, I1=1..nops(AL))},DI): for I1 from 1 to nops(AL) do TAB[I1]:=0: od: for I1 from 1 to nops(DI1) do W:=DI1[I1]: TAB[W[K1]]:=TAB[W[K1]]+1: od: [seq(TAB[I1],I1=1..nops(AL))]/nops(DI1): end: #TM(DI,AL): inputs a vocabulary list, DI, and an alphabet AL #outputs the list of lists such that L[i][j] is #the prob. that if AL[i] is in the first place #then AL[j] is the second place TM:=proc(DI,AL) local DI1, I1,J1,TAB,W,FR: DI1:=subs({ seq( AL[I1]=I1, I1=1..nops(AL))},DI): for I1 from 1 to nops(AL) do for J1 from 1 to nops(AL) do TAB[I1,J1]:=0: od: od: for I1 from 1 to nops(DI1) do W:=DI1[I1]: TAB[W[1],W[2]]:=TAB[W[1],W[2]]+1: od: FR:=Freq(DI, AL,1): [seq([ seq( TAB[I1,J1]/(nops(DI1)*FR[I1]),J1=1..nops(AL))], I1=1..nops(AL))]: end: #BEG(DI,K1): inputs a list DI of words and outputs #the set of their K1-prefixes BEG:=proc(DI,K1) local I1,S1,W: option remember: S1:={}: for I1 from 1 to nops(DI) do W:=DI[I1]: if nops(W)>=K1 then S1:=S1 union {[op(1..K1,W)]}: fi: od: S1: end: #MagS1(DI,MAT): inputs a vocabulary list DI, #and an already constructed matrix, MAT such #that all the columns are STARTS of words in the voc. #and finds all extensions with one more row such #each column are members of also starts of words in DI MagS1:=proc(DI,MAT) local S1,I1,W,MAT1,Hope,I2,J2: S1:={}: Hope:=BEG(DI,nops(MAT)+1): for I1 from 1 to nops(DI) do W:=DI[I1]: if nops(W)=nops(MAT[1]) then MAT1:=[op(MAT),W]: if {seq([seq(MAT1[I2][J2],I2=1..nops(MAT1))],J2=1..nops(MAT1[1]))} subset Hope then S1:=S1 union {MAT1}: fi: fi: od: S1: end: #MagS(DI,K1): inputs a vocabulary list DI, an alphabet #AL, and pos. intgers K1 and K2 and outputs all #the K1 by K2 matrices such that each row and #each column are starters of members of DI MagS:=proc(DI,K1) local I1, S1,MAT: S1:={seq([DI[I1]] , I1=1..nops(DI))}: for I1 from 1 to K1-1 do S1:={seq(op(MagS1(DI,MAT)),MAT in S1)}: od: S1: end: