#OK to post homework
#Victoria Chayes, 2/24/19, Assignment 9
Help:=proc():print(`ArDataGen1(A,c,n,N),ArDataMV(A,v,N),PAmv(L,n),TestPMV(L,v),PA1corrected(L) `): end:
#2 Generalize ArData(L1,H1,N,c) to an arbitrary number of dimensions. Write a procedure but with random points from the n-dimensional cube [0,A]^n . More generally, write a procedure ArDataMV(A,v,N) that inputs a pos. number A, and a vector of numbers v (given as a list), (let n:=nops(v)) and outputs a list of length N where each entry is a pair obtained by picking a random point (a list of length n) in [-A,A]^n and returns the pair [x,0] if v.x (the dot product of v and x) is negative and [x,1] if it is zero or positive.
#For the less general version, simply define ra:=rand(0,A) instead.
#ArDataMV(A,v,N): the generalization described in the problem.
ArDataMV:=proc(A,v,N) local ra,L,x,i,j,k,n:
ra:=rand(-A..A):
L:=[]:
n:=nops(v):
for i from 1 to N do
x:=[seq(ra(),j=1..n)]:
if add(x[k]*v[k],k=1..n)<0 then
L:=[op(L),[x,0]]:
else
L:=[op(L),[x ,1]]:
fi:
od:
L:
end:
#3 Implement the Perceptron algorithm in n dimensions (n arbitrary) by writing a procedure PAmv(L,n) that inputs a list containing data of the form [VectorOfLenghn,ZeroOrOne] where VectorOfLengthn is a list of length n, and n is the dimension.
PAmv:=proc(L,n) local l,x,i,k,j:
x:=[0$n]:
l:=nops(L):
for i from 1 to l do
if L[i][2]=1 and add(x[k]*L[i][1][k],k=1..n)<=0 then
x:=[seq(x[j]+L[i][1][j],j=1..n)]:
elif L[i][2]=0 and add(x[k]*L[i][1][k],k=1..n)>0 then
x:=[seq(x[j]-L[i][1][j],j=1..n)]:
fi:
od:
x:
end:
#testing this using the list/ example given in the book gave the exact same answer as the calculation in the chart of the book.
#4 Write an n-dimensional extension to TestP(L,c). Call it TestPMV(L,v) where v is a vector (list) with n components.
TestPMV:=proc(L,v) local fn,fp,i,k,l,n:
fn:=0:
fp:=0:
l:=nops(L):
n:=nops(v):
for i from 1 to l do
if add(v[k]*L[i][1][k],k=1..n)>=0 and L[i][2]=0 then
fp:=fp+1:
elif add(v[k]*L[i][1][k],k=1..n)<0 and L[i][2]=1 then
fn:=fn+1:
fi:
od:
[fn,fp,l]:
end:
#5 Write a procedure PA1corrected(L) that first transforms the list L to the format [[data,1],ZeroOrOne], and uses your procedure PAmv(L,n) with n=2 to outputs two numbers a and b such that L[i][2]=1 iff ax+b>=0. Compare its performance to PA1(L).
PA1corrected:=proc(L) local M,n,i:
n:=nops(L):
M:=[]:
for i from 1 to n do
M:=[op(M),[[L[i][1],1],L[i][2]]]:
od:
PAmv(M,2):
end:
#Testing on five random lists of length 100 with range 120 to 250 and cutoff 170, much like our basketball heights, we had:
#PA1 guessed: 202, 169, 165, 173, 169
#With accuracy: [7, 0, 100], [7, 0, 100], [11,0,100], [6,0,100], [12,0,100]
#PA1corrected guessed: [-88, -8], [98, -6], [144, -7], [31, -8], [174, -6]
#With accuracy: [67, 0, 100], [63, 0, 100], [65, 0, 100], [53,0,100], [71,0,100]
#When the data was increased to 200 datapoints,
#PA1 guessed: 166, 172, 169, 169, 7
#With accuracy: [16, 0, 200], [13, 0, 200], [21, 0, 200], [14, 0, 200], [13, 0, 200]
#PA1corrected guessed: [46, -17], [-115, -16], [-147, -16], [78, -15], [215, -15]
#With accuracy: [129, 0, 200], [122, 0, 200], [134, 0, 200], [117, 0, 200], [125, 0, 200]
#so for this spread of data, PA1 seems to be a far preferable algorithm.
######################################
#From C9.txt
Help9:=proc() print(`PA1(L) , ArData(L1,H1,N,c), TestP(L,c) , NoisyData(L1,H1,N,c,p) `): end:
#1D perceptron algorithm,
#PA1(L): inputs a list of pairs [heightIncm,{0,1}] where 0 if she or he did not make it 1 if he or she did
#outputs a numnber c a "predictor" that if x>c then they get to be admitted
PA1:=proc(L) local i,c,n:
c:=0:
n:=nops(L):
for i from 1 to n do
if L[i][2]=1 and L[i][1]c then
#False negative then
c:=c+L[i][1]:
fi:
od:
c:
end:
#inputs L1 and H1 (low and high heights in basketball players and the number N of applicants
#and a cut-off c that >=c gets 1 (admitted) and 0 (rejected)
ArData:=proc(L1,H1,N,c) local L,ra,i,t:
ra:=rand(L1..H1):
L:=[]:
for i from 1 to N do
t:=ra():
if t>=c then
L:=[op(L),[t,1]]:
else
L:=[op(L),[t,0]]:
fi:
od:
L:
end:
#Inputs a data set of pairs [height,1 or 0] and a cut-off c, outputs the
#triple # of false negatives and # of false positive, total number of data entries
TestP:=proc(L,c) local fn,fp,i:
fn:=0:
fp:=0:
for i from 1 to nops(L) do
if L[i][1]>=c and L[i][2]=0 then
fp:=fp+1:
elif L[i][1]=c gets 1 (admitted) and 0 (rejected) and a prob. p of mis-classifying
#outputs the noisy data set
NoisyData:=proc(L1,H1,N,c,p) local L,ra,i,t,ra1:
ra:=rand(L1..H1):
ra1:=rand(0.0..1.0):
L:=[]:
for i from 1 to N do
t:=ra():
if t>=c then
if ra1()<=p then
L:=[op(L),[t,0]]:
else
L:=[op(L),[t,1]]:
fi:
else
if ra1()<=p then
L:=[op(L),[t,1]]:
else
L:=[op(L),[t,0]]:
fi:
fi:
od:
L:
end: