#OK to post homework
#Joseph Koutsoutis, 01-21-2024, Assignment 1

with(StringTools):
Help:=proc():
print(`CCg(P,A,k), getFreqs(i1,i2), printFreqDiffsAboveThresh(thresh,upperthresh,i1,i2)`):
end:

#1

#Here are some sample examples based on the first
#30 pages of Frank Garvan's awesome Maple booklet
x + 1/2:
x := 2/3:
evalf(%%,5);
x := 'x':

p := x^10+1:
factor(p);
p := 'p':

eqn := x^2+x+1=0:
R := solve(eqn,x):
simplify(subs(x=R[1], lhs(eqn)));
eqn := 'eqn': R := 'R':

isprime(123456789098765432111);

eqn1 := x^3+a*x=14: eqn2 := a^2-x=7:
isolve({eqn1, eqn2});
eqn1 := 'eqn1': eqn2 := 'eqn2':

eqn := a/(b+c)+b/(a+c)+c/(a+b)=4: 
isolve(eqn);
subs(
a=154476802108746166441951315019919837485664325669565431700026634898253202035277999,
b=36875131794129999827197811565225474825492979968971970996283137471637224634055579,
c=4373612677928697257861252602371390152816537558161613618621437993378423467772036,
eqn);
eqn := 'eqn':
#source of last example: 
#https://www.quora.com/How-do-you-find-the-positive-integer-solutions-to-frac-x-y+z-+-frac-y-z+x-+-frac-z-x+y-4

#2 done

#3

#CCg(P,A,k):  Implements a generalized Caesar Cipher
#INPUTS
#P: a list representing a plaintext
#A: a list representing an alphabet
#k: an integer representing the key
#OUTPUTS
#the encoded plaintext as long as all members of P are found in A
#(we return FAIL otherwise)
#EXAMPLE
#CCg([h,e,l,l,o," ",W,O,R,L,D],[a,b,...,z,A,B,...,Z," "],4);
#should output [l,i,p,p,s,d," ",S,V,P,H]
CCg:=proc(P,A,k) local T,i:
for i from 1 to nops(A) do
 T[A[i]]:=A[(i+k-1 mod nops(A))+1]:
od:

for i from 1 to nops(P) do
 if not assigned(T[P[i]]) then RETURN(FAIL): fi:
od:

RETURN([seq(T[P[i]],i=1..nops(P))]):

end:

#4

#Below this comment, I defined two functions.
#getFreqs is a function that returns T,F as described in the problem,
#and printFreqDiffsAboveThresh tells me if the absolute difference between
#the frequency of a letter in ENGLISH.txt and the frequency of the same letter
#in the j-th position of i-letter words is in a specified range.
#I couldn't come up with a better way of finding differences than this, but here are some observations:
#vowels make up about 37% of all letters but account for 73% of letters in the 2-nd position of 4-letter words
#(for any set word length 3 <= i <= 10, vowels account for at least 57% of letters in the 2-nd position of i-letter words)
#s is much more frequently found at the last position of words (the frequency in the overall text is about .09
#but this increases to 0.4 in the last position of 9 letter words and to around 0.3 in the last position of 
#5,7,8,10-letter words)
#i is more frequently found at the third to last position of words (the frequency in the overall text is about .08
#but this increases to about 0.22 in the third to last position of 7,8,9,10-letter words)
#n,g are more frequently found in the second to last and last positions respectively of 7,8,9,10-letter words
#(the frequency increases by at least .05 but g accounts for only 3% of all letters in ENGLISH.txt)
#y is more frequently found at the end of 5,6,7,8,10-letter words (increase of at least .05 from a base frequency of .017)


#getFreqs(i1,i2)
#INPUTS (both optional)
#i1: an integer representing the smallest word size to consider
#i2: an integer representing the largest word size to consider
#OUTPUTS
#T: An array where T[i][j][k] is the frequency of 
#the k-th letter in the j-th position of an i-letter word
#F: An array where F[k] is the frequency of the k-th letter
#in the text ENGLISH.txt (using words that are i1-i2 letters long)
getFreqs:=proc(i1:=3,i2:=10) 
 local T,F,i,j,k,word,words,alph_to_num_table,tmp:
 if not assigned(ENG) then read `ENGLISH.txt`: fi:
 words := ENG();
 alph_to_num_table := table([seq(Char(i+Ord(a))=i+1,i=0..25)]):

 T := Array(i1..i2):
 for i from i1 to i2 do
  T[i] := Array([seq(Array(1..26),j=1..i)]):
 od:

 F := Array(1..26):

 for i from i1 to i2 do
  for word in words[i] do
    for j from 1 to i do
     T[i][j][alph_to_num_table[convert(word[j],string)]] += 1:
     F[alph_to_num_table[convert(word[j],string)]] += 1:
    od:
  od:
  for j from 1 to i do
   for k from 1 to 26 do
    T[i][j][k] /= nops(words[i]):
   od:
  od:
 od:

 tmp := add(F):
 for k from 1 to 26 do
  F[k] /= tmp:
 od:
 
 RETURN((T,F)):
end:

#printFreqDiffsAboveThresh(thresh,upperthresh,i1,i2)
#INPUTS (only thresh is required)
#i1,i2 correspond to descriptions in getFreqs
#thresh,upperthresh: floats that determine if we print a message (if the difference 
#between the frequency of the k-th letter in ENGLISH.txt and the frequency of the 
#k-th letter in the j-th position of i-letter words is greater than thresh and
#lower than upperthresh, we print a message)
printFreqDiffsAboveThresh:=proc(thresh,upperthresh:=2.0,i1:=3,i2:=10) 
 local T,F,i,j,k,num_to_alph_table:
 num_to_alph_table := table([seq(i+1 = Char(i+Ord(a)),i=0..25)]):

 (T,F) := getFreqs(i1,i2):
 for i from i1 to i2 do
  for j from 1 to i do
   for k from 1 to 26 do
    if abs(F[k] - T[i][j][k]) > thresh and abs(F[k] - T[i][j][k]) <= upperthresh then 
     print(cat(sprintf(`The frequency of letter %s in the text is %f, `,
               num_to_alph_table[k], F[k]),
               sprintf(`while the frequency in position %d of %d-letter words is %f. `,
               j, i, T[i][j][k]),
               sprintf(`The difference is %f`,
               abs(F[k] - T[i][j][k])))):
    fi:
   od:
  od:
 od:
end: