#C26.txt April 29, 2019
Help:=proc(): print(` FT(VOC,A), ENT(L), TRM(VOC,A,I1,J1), CO(ALPH), SCORE(W,VOC,A) , INID(VOC,A)`): end:

read `EV5.txt`:
read `EV3.txt`:

#CO(ALPH): the code table of the alphabet ALPH
#Added after class, May 1, 2019
CO:=proc(ALPH) local T1,I1:

for I1 from 1 to nops(ALPH) do
 T1[ALPH[I1]]:=I1:
od:
op(T1):
end:


#FT(VOC,A): inputs a vocabulary list VOC (a list of lists) and an alphabet A
#( a list of letters) and outputs the frequenncy table of ALL letters that show up
FT:=proc(VOC,A) local VOC1,X,I1,J:
VOC1:=ListTools[Flatten](VOC):
J:=add(X[VOC1[I1]],I1=1..nops(VOC1)):
J:=[seq(coeff(J,X[A[I1]],1),I1=1..nops(A))]:

J/convert(J,`+`):

end:

ENT:=proc(L) local I1: -add(L[I1]*log[2](L[I1]),I1=1..nops(L)):end:

#TRM(VOC,A,I1,J1): inputs a vocabulary set VOC, an alphabet A, and pos. integers I1, J1
#outuputs the nops(A) by nops(A) matrix whose i,j entry is conditional probabity
#Letter[J1]=j|Letter[I1]=i
#corrected May 1, 2019 to avoid division by 0
TRM:=proc(VOC,A,I1,J1) local T1,T2,K1,K2,LIS:
option remember:
for K1 from 1 to nops(A) do
 T1[A[K1]]:=0:
od:

for K1 from 1 to nops(A) do
 for K2 from  1 to nops(A) do
   T2[A[K1],A[K2]]:=0:
 od:
od:

for K1 from 1 to nops(VOC) do
  T1[VOC[K1][I1]]:= T1[VOC[K1][I1]]+1:

 T2[VOC[K1][I1],VOC[K1][J1]]:= T2[VOC[K1][I1],VOC[K1][J1]]+1:
od:

LIS:=[]:

for K1 from 1 to nops(A) do
 if T1[A[K1]]=0 then
 LIS:=[op(LIS),[(1/nops(A))$nops(A)]]:
else
LIS:=[op(LIS),[seq( T2[A[K1],A[K2] ]/T1[A[K1]],K2=1..nops(A)  )] ]:
fi:
od:

LIS:
end:

#Added after class
#INID(VOC,A): the probability of the distribution of first letters of the vocabulary VOC and alphabet A
INID:=proc(VOC,A) local I1,F,X:
F:=add(X[ VOC[I1][1] ], I1=1..nops(VOC)):
F:=[seq(coeff(F,X[A[I1]],1),I1=1..nops(A))]:
F/convert(F,`+`):
end:


#MakeWS(VOC1,VOC2): creates a random K1 by K2 matrix each of whose rows 
#belongs to VOC1 and each of whose columns belongs to VOC2
#K1=nops(VOC1[1]), K2=nops(VOC2[1]):
#belong to VOC
#MakeWS:=proc(VOC1,VOC2) local K1,K2:
#print(`To be continued in homework `):
#end:


#SCORE(W,VOC,A): the score of the word W in the voculbulary A and alphabet A
SCORE:=proc(W,VOC,A) local T1,I1:
 T1:=CO(A):
 INID(VOC,A)[T1[W[1]]]*mul(TRM(VOC,A,I1,I1+1)[T1[W[I1]] ] [  T1[W[I1+1]] ],I1=1..nops(W)-1):
end: