######################################################################
## EM19Proj1.txt Save this file as   EM19Proj1.txt to use it,        #
# stay in the                                                        #
## same directory, get into Maple (by typing: maple <Enter> )        #
## and then type:  read   EM19Proj1.txt: <Enter>                     #
## Then follow the instructions given there                          #
##                                                                   #
## Written by students of Dr. Z.'s Math 640 , Spring 2019, class     #
##  Coordinated by Yukun Yao, yao@math.rutges.edu                    # 
#####################################################################

with(CurveFitting):

read `DATA.txt`:

print(`This is EM19Proj1.txt, a final project for Dr. Z.'s Math 640, Spring 2019, class, Math 640, written by`):
print(`Yukun Yao (coordinator), Victoria Chayes, .....,   `):

print(`To analyze the relationship between Factulty accoplishment and rewards (rank and salary), and among them`):

Help:=proc()
if args=NULL then

print(` EM19Proj1.txt: A Maple package for  Analyzing Data in Maple given in terms of an ABT `):
print(`with the format [FirstName,LastName,Featues , ... ]`):
print(`The MAIN procedures are: Cor, ExtractFields, SubLists `):
print(`The procedures are: Percentile, Hist, Covar `):
print(`   `):


elif nargs=1 and args[1]=Cor then
print(`Cor(L): inputs a list of pairs [P,S] outputs the corrletion. Try`):
print(`Cor(ExtractFields(MR,[4,7])); `):

elif nargs=1 and args[1]=ExtractFields then
print(`ExtractFields(L,M): inputs a list of lists L and a list M , produces a list of sublists with only the features given by M in that order`):
print(`For example, try:`):
print(`ExtractFields(MR,[7,1,2]);`):

elif nargs=1 and args[1]=SubLists then
print(`SubLists(L,I1,J1): given a list L of lists where in each item, the I1-th list describes a categorical featues from 1 to J1`):
print(`outputs the individual lists for each of the categorical features. Try:`):
print(`SubLists(MR,3,4);`):

elif nargs=1 and args[1]=Percentile then
print(`Percentile(L, F, P): inputs a list of lists L, a feature in the F-th position, a percentile P`):
print(`returns the P-th percentile for the I-th feature. Try:`):
print(`Percentile(MR,7,95);`):

elif nargs=1 and args[1]=Histogram then
print(`Histogram(L, F, P): inputs a list of lists L, a feature in the F-th position, a percentile P`):
print(`prints the histogram for the feature with the bin width being W. Try:`):
print(`Histogram(MR,6,10);`):

elif nargs=1 and args[1]=Covar then
print(`Covar(L, M): inputs a list of lists L, a list M, and produces the covariance matrix with only the features given by M.`):
print(`Try:`):
print(`Covar(MR,[3,4,5]);`):

print(``):



else
 print(`There is no such thing as`, args):

fi:


end:


#SubLists(L,I1,J1): given a list L of lists where in each item, the I1-th list describes a categorical featues from 1 to J1
#outputs the individual lists for each of the categorical features. Try:
#SubLists(MR,3,4);
SubLists:=proc(L,I1,J1) local T,J1A,K:

if not (type(L,list)  and {seq(type(L[K],list),K=1..nops(L))}={true} and nops({seq(nops(L[K]),K=1..nops(L))})=1 ) then
 print(`Bad input`):
 RETURN(FAIL):
fi:


if {seq(L[K][I1],K=1..nops(L))} minus {seq(K,K=1..J1)}<>{} then
 print(`Bad input`):
 RETURN(FAIL):
fi:
  

for J1A from 1 to J1 do
  T[J1A]:=[]:
od:

for K from 1 to nops(L) do
 T[L[K][I1]]:=  [op(T[L[K][I1]]),L[K]]:
od:

[seq(T[K],K=1..J1)]:
end:


#ExtractFields(L,M): inputs a list of lists L and a list M , produces a a list with only the features given by M in that order
#and sorted
#For example, try:
#ExtractFields(MR,[7,1,2]);
ExtractFields:=proc(L,M) local K1,K2:
sort([seq([seq(L[K1][M[K2]],K2=1..nops(M))],K1=1..nops(L))]):

end:

#AveAndSD(L): the average and s.d. of L when L is a list of lists
AveAndSD:=proc(L) local i,mu,sig:
mu:=evalf(add(L[i][1],i=1..nops(L))/nops(L)):
sig:=evalf(sqrt(add((L[i][1]-mu)^2,i=1..nops(L))/nops(L))):
[mu,sig]:
end:

#Cor(L): inputs a list of pairs [P,S] outputs the corrletion
Cor:=proc(L) local L1,L2,i,A,B,mu1,mu2,sig1,sig2:
L1:=[seq(L[i][1],i=1..nops(L))]:
L2:=[seq(L[i][2],i=1..nops(L))]:
A:=AveAndSD(L1):
B:=AveAndSD(L2):
mu1:=A[1]: sig1:=A[2]: mu2:=B[1]: sig2:=B[2]:

[A,B,add((L1[i]-mu1)*(L2[i]-mu2),i=1..nops(L))/(nops(L)*sig1*sig2)]:
end:

#Percentile(L, F, P): inputs a list of lists L, a feature in the F-th position, a percentile P
#returns the P-th percentile for the I-th feature
Percentile:=proc(L, F, P) local L1:
L1:=ListTools[Flatten](ExtractFields(L, [F])):
Statistics[Percentile](L1, P):
end:

#Histogram(L, F, W): inputs a list of lists L, a feature in the F-th position
#prints the histogram for the feature with the bin width being W
Histogram:=proc(L, F, W) local L1:
L1:=ListTools[Flatten](ExtractFields(L, [F])):
Statistics[Histogram](L1, frequencyscale=absolute,binwidth=W):
end:


#Covar(L, M): inputs a list of lists L, a list M, and produces the covariance matrix with only the features given by M
Covar:=proc(L,M) local LL,K,FI:

LL:=LinearAlgebra[Transpose](Matrix([seq( [seq(L[K][M[FI]], K=1..nops(L))], FI=1..nops(M))])):
Statistics[CovarianceMatrix](LL):

end:

# The following code extracts the mean and the standard Deviations for each field
# seq(AveAndSD(ExtractFields(MR, [i])), i = 3 .. 9):

# The following code extracts the mean and the standard deviations for each rank
# seq([seq(AveAndSD(ExtractFields(SubLists(MR,3,4)[j], [i])), i = 3 .. 9)], j=1..4):

# The following code extracts the percentile for each field
# seq([seq(Percentile(MR,i,j), j=[5,10,25,50,75,90,95])], i = 3 .. 9):

# The following code outputs the covariance matrix for the 6 fields
# Covar(MR, [3, 4, 5, 6, 7, 9])