######################################################################
## EM19Proj1.txt Save this file as   EM19Proj1.txt to use it,        #
# stay in the                                                        #
## same directory, get into Maple (by typing: maple <Enter> )        #
## and then type:  read   EM19Proj1.txt: <Enter>                     #
## Then follow the instructions given there                          #
##                                                                   #
## Written by students of Dr. Z.'s Math 640 , Spring 2019, class     #
##  Coordinated by Yukun Yao, yao@math.rutges.edu                    # 
#####################################################################

with(CurveFitting):

read `DATA.txt`:

print(`This is EM19Proj1.txt, a final project for Dr. Z.'s Math 640, Spring 2019, class, Math 640, written by`):
print(`Yukun Yao (coordinator), Victoria Chayes, .....,   `):

print(`To analyze the relationship between Factulty accoplishment and rewards (rank and salary), and among them`):

Help:=proc()
if args=NULL then

print(` EM19Proj1.txt: A Maple package for  Analyzing Data in Maple given in terms of an ABT `):
print(`with the format [FirstName,LastName,Featues , ... ]`):
print(`The MAIN procedures are: Cor, ExtractFields, SubLists `):
print(`   `):


elif nargs=1 and args[1]=Cor then
print(`Cor(L): inputs a list of pairs [P,S] outputs the corrletion. Try`):
print(`Cor(ExtractFields(MR,[4,7])); `):

elif nargs=1 and args[1]=ExtractFields then
print(`ExtractFields(L,M): inputs a list of lists L and a list M , produces a list of sublists with only the features given by M in that order`):
print(`For example, try:`):
print(`ExtractFields(MR,[7,1,2]);`):

elif nargs=1 and args[1]=SubLists then
print(`SubLists(L,I1,J1): given a list L of lists where in each item, the I1-th list describes a categorical featues from 1 to J1`):
print(`outputs the individual lists for each of the categorical features. Try:`):
print(`SubLists(MR,3,4);`):

#-----------------------------------------------------------Revision from Tong Cheng-------------------------------
elif nargs=1 and args[1]=LS_reg then
print(`LS_reg(x,y): inputs 2 lists of data  outputs the pair of coeffcients which is the least square regression of y=a*x+b`):
print(`For example, try:`):
print(`LS_reg([seq(i,i=1..100)],[seq(0.01*i^2+2*i+1)]);`):

elif nargs=1 and args[1]=LS_reg_refine then
print(`LS_reg_refine(x,y): a list L of integers [*,*,*] and a posi. integer representing corresponding fields, former be independent variable while latter be dependent var.  outputs the list of coeffcients `):
print(`For example, try:`):
print(`LS_reg_refine([4],7,MR)`):

elif nargs=1 and args[1]=Display_LS then
print(`inputs: L - lists of coeffcient [a[1], ... a[k], b ] in y = a[1]*n[1] + ... + a[k]*n[k] + b`):
print(`x_index - list of number of field in ADT for independent var`):
print(`y_index - number of field in ADT for dependent var`):
print(`MR - database`):
print(`outputs: a 2-D or 3-D graph depicting the result of simple least square regression`):
print(`For example in 2D, try:`):
print(`Display_LS(P2_coeff,[5],7,MR)`):
print(`For example in 3D, try:`):
print(`Display_LS(P3_coeff,[4,5],7,MR)`):

#------------------------------------------------------------------------------------------------------------------



print(``):

else
 print(`There is no such thing as`, args):

fi:


end:


#SubLists(L,I1,J1): given a list L of lists where in each item, the I1-th list describes a categorical featues from 1 to J1
#outputs the individual lists for each of the categorical features. Try:
#SubLists(MR,3,4);
SubLists:=proc(L,I1,J1) local T,J1A,K:

if not (type(L,list)  and {seq(type(L[K],list),K=1..nops(L))}={true} and nops({seq(nops(L[K]),K=1..nops(L))})=1 ) then
 print(`Bad input`):
 RETURN(FAIL):
fi:


if {seq(L[K][I1],K=1..nops(L))} minus {seq(K,K=1..J1)}<>{} then
 print(`Bad input`):
 RETURN(FAIL):
fi:
  

for J1A from 1 to J1 do
  T[J1A]:=[]:
od:

for K from 1 to nops(L) do
 T[L[K][I1]]:=  [op(T[L[K][I1]]),L[K]]:
od:

[seq(T[K],K=1..J1)]:
end:


#ExtractFields(L,M): inputs a list of lists L and a list M , produces a a list with only the features given by M in that order
#and sorted
#For example, try:
#ExtractFields(MR,[7,1,2]);
ExtractFields:=proc(L,M) local K1,K2:

sort([seq([seq(L[K1][M[K2]],K2=1..nops(M))],K1=1..nops(L))]):

end:

#AveAndSD(L): the average and s.d. of L
AveAndSD:=proc(L) local i,mu,sig:
mu:=evalf(add(L[i],i=1..nops(L))/nops(L)):
sig:=sqrt(add((L[i]-mu)^2,i=1..nops(L))/nops(L)):
[mu,sig]:
end:

#Cor(L): inputs a list of pairs [P,S] outputs the corrletion
Cor:=proc(L) local L1,L2,i,A,B,mu1,mu2,sig1,sig2:
L1:=[seq(L[i][1],i=1..nops(L))]:
L2:=[seq(L[i][2],i=1..nops(L))]:
A:=AveAndSD(L1):
B:=AveAndSD(L2):
mu1:=A[1]: sig1:=A[2]: mu2:=B[1]: sig2:=B[2]:

[A,B,add((L1[i]-mu1)*(L2[i]-mu2),i=1..nops(L))/(nops(L)*sig1*sig2)]:
end:

#-------------------------------------------------------------------------------

(*

elif nargs=1 and args[1]=LS_reg then
print(`LS_reg(x,y): inputs 2 lists of data  outputs the pair of coeffcients which is the least square regression of y=a*x+b`):
print(`For example, try:`):
print(`LS_reg([seq(i,i=1..100)],[seq(0.01*i^2+2*i+1)]);`):

elif nargs=1 and args[1]=LS_reg_refine then
print(`LS_reg(x,y): a list L of integers [*,*,*] and a posi. integer representing corresponding fields, former be independent variable while latter be dependent var.  outputs the list of coeffcients `):
print(`For example, try:`):
print(`LS_reg_refine([4],7,MR)`):

elif nargs=1 and args[1]=Display_LS then
print(`inputs: L - lists of coeffcient [a[1], ... a[k], b ] in y = a[1]*n[1] + ... + a[k]*n[k] + b`):
print(`x_index - list of number of field in ADT for independent var`):
print(`y_index - number of field in ADT for dependent var`):
print(`MR - database`):
print(`outputs: a 2-D or 3-D graph depicting the result of simple least square regression`):
print(`For example in 2D, try:`):
print(`Display_LS(P2_coeff,[5],7,MR)`):
print(`For example in 3D, try:`):
print(`Display_LS(P3_coeff,[4,5],7,MR)`):


*)


# LS_reg(x,y):
# inputs: data (x,y)
# outputs: [a,b] in m = a*n + b using least square and display the plots
LS_reg:=proc(x,y)  local  a,b,f,A,B:

with(plots):   # for display func
with(Statistics):  # for Fit func

f:=Fit(a*n+b,x,y,n):
a:=coeff(f,n,1):
b:=coeff(f,n,0):

A:=plot(x,y,style=point):
B:=plot(f,n=min(x)-5..max(x)+5):

return([a,b]);
#display({A,B},title=cat("a=",a,", ","b=",b)):

end:


# LS_reg_refine: 
# inputs: a list L of integers [*,*,*] and a posi. integer representing corresponding fields, former be independent variable while latter be dependent var
# outputs: the list of coeffcients
LS_reg_refine:=proc(L,y_index,MR)  local dim, F, X, y, REG, i, a, b, n :

with(linalg):
with(Statistics):
dim:=nops(L):

F:=add(a[i]*n[i],i=1..dim)+b:


# Fit func needs vector to be column.
X:=ExtractFields(MR,L):
X:=matrix(nops(X),dim,X):
y:=ExtractFields(MR,[y_index]):

REG:=Fit(F,X,y,[seq(n[i],i=1..dim)]):

[seq(coeff(REG,n[i],1),i=1..dim),subs({seq(n[i]=0,i=1..dim)},REG)]:

end:  


# Display_LS(L,x_index,y_index,MR): 
# inputs: L - lists of coeffcient [a[1], ... a[k], b ] in y = a[1]*n[1] + ... + a[k]*n[k] + b
#         x_index - list of number of field in ADT for independent var
#         y_index - number of field in ADT for dependent var
#         MR - database
# outputs: a 2-D or 3-D graph depicting the result of simple least square regression
Display_LS:=proc(L,x_index,y_index,MR)   local F,n,lower_domain,upper_domain,A,B,Data_matrix :

# enable 'display' func and 'ScatterPlot3D'
with(plots):
with(Statistics):

# generate the linear func from least square coeffcients
F:=add(L[i]*n[i],i=1..nops(L)-1)+L[-1]:

# need to be transformed to matrix in 2-D case
Data_matrix:=ExtractFields(MR,[op(x_index),y_index]):

if nops(Data_matrix[1]) = 2 then
	# in 2-D case
	lower_domain:= min([seq(Data_matrix[i][1],i=1..nops(Data_matrix))])-5:
	upper_domain:= max([seq(Data_matrix[i][1],i=1..nops(Data_matrix))])+5:
	A:=plot(F,n[1]=lower_domain..upper_domain):
	Data_matrix:=matrix(nops(Data_matrix),2,Data_matrix):
	B:=plot(Data_matrix,style=point):
	return(display({A,B}, title=cat("a[1]=",L[1],", ","b=",L[-1]))):
else 
	# in 3-D case
    lower_domain:=[min([seq(Data_matrix[i][1],i=1..nops(Data_matrix))])-5, min([seq(Data_matrix[i][2],i=1..nops(Data_matrix))])-5]:
    upper_domain:=[max([seq(Data_matrix[i][1],i=1..nops(Data_matrix))])+5, max([seq(Data_matrix[i][2],i=1..nops(Data_matrix))])+5]:
    A:=plot3d(F,n[1]=lower_domain[1]..upper_domain[1], n[2]=lower_domain[2]..upper_domain[2]):
    B:=ScatterPlot3D(Data_matrix):
    return(display({A,B},title=cat("a[1]=",L[1],", ", "a[2]=", L[2],", ","b=",L[-1]))):
    

fi:

end: