#OK to post homework
#Victoria Chayes, 3/10/19, Assignment 13

Help:=proc():print(`G3(f,var,pt,sz,MaxI,tol)`): end:

#From the textbook and general googling, it seems like picking the step size and tolerance are things that are individual to each input, and while there are general ranges that tend to work out well, it is entirely gut instinct of the implementers -- when  the step size is smaller, it'll take way longer to converge, but if the step size is too large a minimum might never converge. One way to address this is to have a variable step size. I wrote a version of the gradient descent method using the Barzilai-Borwein method, which only involves looking at the previous step to update the step size. This worked better on randomized functions.

G3:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1, g2, pt2, s,n:
n:=nops(g1):
pt1:=pt:
g:=Grad(f,var):
g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
g2:=g1:
pt2:=pt1:
g1:=evalf(sqrt(add(g1[i]^2,i=1..n))):
s:=sz:

for i from 1 to MaxI while s>tol do
  pt1:=GD2(f,var,pt1,s):
  g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
  s:=abs(add((pt1[i]-pt2[i])*(g1[i]-g2[i]),i=1..n )/evalf(sqrt(add((g1[i]-g2[i])^2,i=1..n)))):
  g2:=g1:
  pt2:=pt1:
od:

pt1:    
end:

#For G1 and G2, 


############################
#C13.txt: March 7, 2019 Dr. Z.'s Math640(RU)
#Implementing the gradient descent algorithm
Help13:=proc(): print(`OptF(f,var), Grad(f,var) , GD1(f,var,pt,sz), GD2(f,var,pt,sz) `): 
print(`G1(f,var,pt,sz,MaxI,tol), G2(f,var,pt,sz,MaxI,tol) `):
end:

#OptF(f,var): inputs a nice function f (expression)
# of a list of variables, and finds the local max and min locations
#and values. For example (x^2+y^2+1,[x,y]); should output {[[0,0],1]}.
OptF:=proc(f,var) local eq,i,sol,L:
eq:={seq(diff(f,var[i])=0,i=1..nops(var))}:
sol:=solve(eq,var):

if sol=NULL then
  RETURN(FAIL):
fi:

L:=[seq(subs(sol[i],var),i=1..nops(sol))]: 
L:
end:

Grad:=proc(f,var) local i: option remember:
 [seq(diff(f,var[i]),i=1..nops(var))]: end:

#GD1(f,var,pt,sz): inputs a function (expression) f in the list of variables var, a point pt
#a step-size sz, and a small parameter e indicating when to stop
#Performs one step, returns the next point NORMALIZED
GD1:=proc(f,var,pt,sz) local g,i,Ng:
if not ( type(var,list) and type(pt,list) and nops(var)=nops(pt) and type(sz,numeric) and sz>0) then
  print(`bad input`):
  RETURN(FAIL):
fi:
  
 g:=Grad(f,var):

 g:=subs({seq(var[i]=pt[i],i=1..nops(var))},g):
 Ng:=evalf(sqrt(add(g[i]^2,i=1..nops(g)))):
 g:=g/Ng:

 pt-sz*g:

 end:

#GD2(f,var,pt,sz): inputs a function (expression) f in the list of variables var, a point pt
#a step-size sz, and a small parameter e indicating when to stop
#Performs one step, returns the next point NOT Normalized
GD2:=proc(f,var,pt,sz) local g,i:
if not ( type(var,list) and type(pt,list) and nops(var)=nops(pt) and type(sz,numeric) and sz>0) then
  print(`bad input`):
  RETURN(FAIL):
fi:
  
 g:=Grad(f,var):

 g:=subs({seq(var[i]=pt[i],i=1..nops(var))},g):
 

 pt-sz*g:

 end:

    #G1(f,var,pt,sz,MaxI,tol): inputs a function f given as an expression, in a list of variables var
    #a starting point pt, MaxI: number of iterations, and tol a small parameter, we stop when the
    #norm of the gradient is less than tol, or hit MaxI, iterations. NORMALIZED
   G1:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1:
   pt1:=pt:
     g:=Grad(f,var):
       g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
       g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))):
     for i from 1 to MaxI while g1>tol do
       pt1:=GD1(f,var,pt1,sz):
       
       g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
       g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))):
   od:
   pt1:    
          end:
 #G2(f,var,pt,sz,MaxI,tol): inputs a function f given as an expression, in a list of variables var
    #a starting point pt, MaxI: number of iterations, and tol a small parameter, we stop when the
    #norm of the gradient is less than tol, or hit MaxI, iterations. NOT NORMALIZED
   G2:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1:
   pt1:=pt:
     g:=Grad(f,var):
       g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
       g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))):
     for i from 1 to MaxI while g1>tol do
       pt1:=GD2(f,var,pt1,sz):
       
       g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g):
       g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))):
   od:
   pt1:    
          end: