#Work on Project #NUMBER 2: read("/Users/kw/Documents/English.txt"): read("/Users/kw/Documents/finalcurrent427.txt"); #Here is the transition matrix proc from our final project: #We modify it to give counts rather than probabilities: #_________________________________________________________ #_ MakeTrM:=proc(k1,stSize:=1) local E,F,n,m,i,j,b1,b2,Counts,colTotal,T,L,Tbl,States,st,W,Wpr: option remember: # Only consider words of length >=k if k1 = 0 or k1 = 1 then E:=[seq(op(L),L in ENG())]: else: E:=ModifMtx(k1, ENG()): #E:=TruncMtx(k1, [seq(op(s),s in ENG())]): fi: E:=ConvertInd(E): # convert all words into number lists. this is done to preserve data and for convenience. n:=nops(E): # number of words ### if stSize=1 then: Counts:=[[0$28]$28]: # number of transitions colTotal:=[[0]$28]: # number of appearances colTotal[1][1]:=n: # every word has a start and an end colTotal[28][1]:=n: for i from 1 to n do: Counts[1][E[i][1]+1]:=Counts[1][E[i][1]+1]+1: # the first row is dedicated to "start" m:=nops(E[i]): # word length for j from 1 to m-1 do: colTotal[E[i][j]+1][1]:= colTotal[E[i][j]+1][1]+1: # letter E[i][j]+1 has appeared! note: "a" is colmn 2 but 1 in E. # terrible to read, I know. update transition count for j to j+1th letter in the ith word: Counts[E[i][j]+1][E[i][j+1]+1]:=Counts[E[i][j]+1][E[i][j+1]+1] + 1: od: Counts[E[i][m]+1][28]:=Counts[E[i][m]+1][28]+1: # update "end" count colTotal[E[i][m]+1][1]:= colTotal[E[i][m]+1][1]+1: od: T:= [seq([seq(Counts[j][i],i=1..28)],j=1..28)]: # transition matrix! return(T): fi: end: ## #__________________________________________ #END GROUP CODE #________________________________ #words of length at least 2 M:=MakeTrM(1): #CLEAN OFF START AND END with(LinearAlgebra): M:=DeleteRow(M, 27): M:=DeleteColumn(M, 1): M:=DeleteColumn(M, 27): M:=DeleteRow(M,1): #want to find number of nonzero entries #want to find number of entries which are greater than 10 countnonzero:=0: countbig10:=0: for i from 1 to 26 do for j from 1 to 26 do if M(i,j)<>0 then countnonzero:=countnonzero+1: fi: if M(i,j)>9 then countbig10:=countbig10+1: fi: od: od: print(`the number of pairs which appear is`, countnonzero): print(`the number of pairs which appear more than 10 times is`, countbig10): #We get that 570 pairs appear, and 491 appear more than 10 times. #Challenge #3: This can be done with code that I have seen that my teammates wrote, but I don't want to claim it as my own for credit here.