#C28.txt, May 4, 2015, Experimental Linguistics
Help:=proc(): print(` Freq(DI, AL,K1), TM(DI,AL), BEG(DI,K1) `): 
print(` MagS1(DI,MAT),MagS(DI,K1) `):
end:

ALP:=[a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]:

#read `W:\\ENGLISH3.txt`:
read ENGLISH3:
read ENGLISH4:

#Freq(DI,AL,K1): inputs a "dictionary" DI in an alphabet
#AL, and a positive integer K1, and outputs a list of
#size nops(AL) whose i-th entry is the probability
#of letter AL[I1] showing up in the K1-th place
Freq:=proc(DI, AL,K1) local DI1, TAB,I1, W:

DI1:=subs({ seq( AL[I1]=I1, I1=1..nops(AL))},DI):

for I1 from 1 to nops(AL) do
 TAB[I1]:=0:
od:

for I1 from 1 to nops(DI1) do
W:=DI1[I1]:
TAB[W[K1]]:=TAB[W[K1]]+1:
od:

[seq(TAB[I1],I1=1..nops(AL))]/nops(DI1):

end:

#TM(DI,AL): inputs a vocabulary list, DI, and an alphabet AL
#outputs the list of lists such that L[i][j] is
#the prob. that if AL[i] is in the first place
#then AL[j] is the second place
TM:=proc(DI,AL) local DI1, I1,J1,TAB,W,FR:

DI1:=subs({ seq( AL[I1]=I1, I1=1..nops(AL))},DI):
for I1 from 1 to nops(AL) do
 for J1 from 1 to nops(AL) do
    TAB[I1,J1]:=0:
 od:
od:

for I1 from 1 to nops(DI1) do
 W:=DI1[I1]:
TAB[W[1],W[2]]:=TAB[W[1],W[2]]+1:
od:


FR:=Freq(DI, AL,1):

[seq([ seq( TAB[I1,J1]/(nops(DI1)*FR[I1]),J1=1..nops(AL))],
I1=1..nops(AL))]:


end:

#BEG(DI,K1): inputs a list DI of words and outputs
#the set of their K1-prefixes
BEG:=proc(DI,K1) local I1,S1,W:
option remember:
S1:={}:

for I1 from 1 to nops(DI) do
 W:=DI[I1]:
 if nops(W)>=K1 then
   S1:=S1 union {[op(1..K1,W)]}:
 fi:
od:
S1:
end:

#MagS1(DI,MAT): inputs a vocabulary list DI, 
#and an already constructed matrix, MAT such
#that all the columns are STARTS of words in the voc.
#and finds all extensions with one more row such
#each column are members of also starts of words in DI
MagS1:=proc(DI,MAT) local S1,I1,W,MAT1,Hope,I2,J2:

S1:={}:
Hope:=BEG(DI,nops(MAT)+1):
for I1 from 1 to nops(DI) do
 W:=DI[I1]:

 if nops(W)=nops(MAT[1]) then
 MAT1:=[op(MAT),W]:

  if {seq([seq(MAT1[I2][J2],I2=1..nops(MAT1))],J2=1..nops(MAT1[1]))}
      subset Hope then
    S1:=S1 union {MAT1}:
  fi:
 fi:
od:
 
S1:


end:

#MagS(DI,K1): inputs a vocabulary list DI, an alphabet
#AL, and pos. intgers K1 and K2 and outputs all
#the K1 by K2 matrices such that each row and
#each column are starters of members of DI
MagS:=proc(DI,K1) local I1, S1,MAT:
S1:={seq([DI[I1]] , I1=1..nops(DI))}:

for I1 from 1 to K1-1 do

S1:={seq(op(MagS1(DI,MAT)),MAT in S1)}:
od:

S1:

end: