#OK to post homework #Victoria Chayes, 3/10/19, Assignment 13 Help:=proc():print(`G3(f,var,pt,sz,MaxI,tol)`): end: #From the textbook and general googling, it seems like picking the step size and tolerance are things that are individual to each input, and while there are general ranges that tend to work out well, it is entirely gut instinct of the implementers -- when the step size is smaller, it'll take way longer to converge, but if the step size is too large a minimum might never converge. One way to address this is to have a variable step size. I wrote a version of the gradient descent method using the Barzilai-Borwein method, which only involves looking at the previous step to update the step size. This worked better on randomized functions. G3:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1, g2, pt2, s,n: n:=nops(g1): pt1:=pt: g:=Grad(f,var): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): g2:=g1: pt2:=pt1: g1:=evalf(sqrt(add(g1[i]^2,i=1..n))): s:=sz: for i from 1 to MaxI while s>tol do pt1:=GD2(f,var,pt1,s): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): s:=abs(add((pt1[i]-pt2[i])*(g1[i]-g2[i]),i=1..n )/evalf(sqrt(add((g1[i]-g2[i])^2,i=1..n)))): g2:=g1: pt2:=pt1: od: pt1: end: #For G1 and G2, ############################ #C13.txt: March 7, 2019 Dr. Z.'s Math640(RU) #Implementing the gradient descent algorithm Help13:=proc(): print(`OptF(f,var), Grad(f,var) , GD1(f,var,pt,sz), GD2(f,var,pt,sz) `): print(`G1(f,var,pt,sz,MaxI,tol), G2(f,var,pt,sz,MaxI,tol) `): end: #OptF(f,var): inputs a nice function f (expression) # of a list of variables, and finds the local max and min locations #and values. For example (x^2+y^2+1,[x,y]); should output {[[0,0],1]}. OptF:=proc(f,var) local eq,i,sol,L: eq:={seq(diff(f,var[i])=0,i=1..nops(var))}: sol:=solve(eq,var): if sol=NULL then RETURN(FAIL): fi: L:=[seq(subs(sol[i],var),i=1..nops(sol))]: L: end: Grad:=proc(f,var) local i: option remember: [seq(diff(f,var[i]),i=1..nops(var))]: end: #GD1(f,var,pt,sz): inputs a function (expression) f in the list of variables var, a point pt #a step-size sz, and a small parameter e indicating when to stop #Performs one step, returns the next point NORMALIZED GD1:=proc(f,var,pt,sz) local g,i,Ng: if not ( type(var,list) and type(pt,list) and nops(var)=nops(pt) and type(sz,numeric) and sz>0) then print(`bad input`): RETURN(FAIL): fi: g:=Grad(f,var): g:=subs({seq(var[i]=pt[i],i=1..nops(var))},g): Ng:=evalf(sqrt(add(g[i]^2,i=1..nops(g)))): g:=g/Ng: pt-sz*g: end: #GD2(f,var,pt,sz): inputs a function (expression) f in the list of variables var, a point pt #a step-size sz, and a small parameter e indicating when to stop #Performs one step, returns the next point NOT Normalized GD2:=proc(f,var,pt,sz) local g,i: if not ( type(var,list) and type(pt,list) and nops(var)=nops(pt) and type(sz,numeric) and sz>0) then print(`bad input`): RETURN(FAIL): fi: g:=Grad(f,var): g:=subs({seq(var[i]=pt[i],i=1..nops(var))},g): pt-sz*g: end: #G1(f,var,pt,sz,MaxI,tol): inputs a function f given as an expression, in a list of variables var #a starting point pt, MaxI: number of iterations, and tol a small parameter, we stop when the #norm of the gradient is less than tol, or hit MaxI, iterations. NORMALIZED G1:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1: pt1:=pt: g:=Grad(f,var): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))): for i from 1 to MaxI while g1>tol do pt1:=GD1(f,var,pt1,sz): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))): od: pt1: end: #G2(f,var,pt,sz,MaxI,tol): inputs a function f given as an expression, in a list of variables var #a starting point pt, MaxI: number of iterations, and tol a small parameter, we stop when the #norm of the gradient is less than tol, or hit MaxI, iterations. NOT NORMALIZED G2:=proc(f,var,pt,sz,MaxI,tol) local g, g1, pt1,i,N1: pt1:=pt: g:=Grad(f,var): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))): for i from 1 to MaxI while g1>tol do pt1:=GD2(f,var,pt1,sz): g1:=subs({seq(var[i]=pt1[i],i=1..nops(var))},g): g1:=evalf(sqrt(add(g1[i]^2,i=1..nops(g1)))): od: pt1: end: