#C19.txt, 4/4/19, Finishing Decision Trees Help:=proc(): print(` TBU(P,DS) , Majority(P,DS), MakeDS1(P,DS,AF,DSprev), MakeDS(P,DS) `): end: #old stuff #C18.txt, April Fool's Day, 2019 #Decison Trees Help18:=proc(): print(` H(P,DS) , Dodam(N,k), IG(P,DS,f) `): print(`MakeDS(P,DS) `): end: #start C16.txt #C16.txt, Dr. Z. ExpMath, March 25, 2019 Help16:=proc(): print(` GuessWhoS(x,N), ManyGWS(N,K,t), GuessWhoC(x,N) , SEnt(P) `): print(`Profile(n,k), ABTP(N,k) `): end: #Simulating a stupid guess who game with a nunmber from 0 to N GuessWhoS:=proc(x,N) local co,i: co:=0: for i from 0 to N do #print(`Is the number equal to`, i): co:=co+1: if x=i then RETURN(co): fi: od: FAIL: end: #ManyGHS(N,K,t): the prob. generating function using variable t #fof K Guess who games, followed by the average and s.d. ManyGWS:=proc(N,K,t) local ra,i,f,mu,sig,f1: ra:=rand(0..N): f:=add(t^GuessWhoS(ra(),N),i=1..K)/K: mu:=subs(t=1,diff(f,t)): f1:=f/t^mu: sig:= sqrt(subs(t=1,diff(t*diff(f1,t),t))): [evalf([mu,sig]),sort(evalf(f))]: end: #Simulating a clever guess who game with a nunmber from 0 to N GuessWhoC:=proc(x,N) local co,L,U,U0: L:=0: U:=trunc((N-1)/2): co:=0: while L=L and x<=U then U:=trunc((U+L)/2): else U0:=U: U:=trunc(U+(U-L)/2): L:=U0+1: fi: od: co: end: #SEent(P): The (Shannon) amount of information in bits of the discere prob. distribution P SEnt:=proc(P) local i,P1: P1:=remove(x->x=0,P): P1:=P1/convert(P1,`+`): if not (type(P1,list) and min(op(P1))>0 and convert(P1,`+`)=1) then print(`bad input`): RETURN(FAIL): fi: -add(P1[i]*log[2](P1[i]),i=1..nops(P1)): end: #[[input,output]]. #F(input)=output, Minimize Sum((F(input)-output)^2, data) ##input are n-dim vectors and fit it with a polynomial in (x1, ..., xn) #of degree d #[[Name,[DescriptiveFeature1, ..., DescriptiveFeature],targetFeature] , ...] #Profile(n,k): inputs a positive integer n and k outputs #a list of 0,1 of length k whose i-th item is "Is n divisible by prime[i]" for i from 1 to k #For example #Profile(10,3) should output [1,0,1] Profile:=proc(n,k) local i: subs({true=1, false=0},[ seq(evalb(n mod ithprime(i)=0),i=1..k)]): end: #Inputs a positive integer N and pos. integer k outputs an ABT #for the data generated from them using the k descriptive features #is it divisible by the k-th prime ABTP:=proc(N,k) local n: [ seq([n,Profile(n,k), isprime(n)],n=1..N)]: end: #end C16.txt #From C17.txt #C17.txt, Decision trees, March 28, 2019 Help17:=proc(): print(`RandDT(P,d), RandDT1(P,d,AF) , Predict(P,T,v)`): print(`Breakup(P,S,f)`): end: #If there are k desriptive features, we name them 1, ..., k #and descriptive feature i has a[i] possible values (named 1, ...i) #and the target feature has N possible values #[[a[1], ..., a[k]],N]: #For example if "divisible by 2", "by 3" and by 5, and the target "is prime" #[[2,2,2],2] #RandDT1(P,d,AF): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d, with vertex-labels fromAF RandDT1:=proc(P,d,AF) local L,N,T1,T,RootLabel,AF1,k,i: L:=P[1]: N:=P[2]: if d=0 then RETURN([rand(1..N)(),[]]): fi: RootLabel:=AF[rand(1..nops(AF))()]: AF1:=AF minus {RootLabel}: #k:=number of subtrees of descriptive feature RootLabel k:=L[RootLabel]: T:=[]: for i from 1 to k do T1:=RandDT1(P,d-1,AF1): T:=[op(T),T1]: od: [RootLabel,T]: end: #RandDT(P,d): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d RandDT:=proc(P,d) local i: if d>nops(P[1]) then print(`The depth can be at most`, nops(P[1])): RETURN(FAIL): fi: RandDT1(P,d,{seq(i,i=1..nops(P[1]))}): end: #Predict(P,T,v): Inputs a profile P #and a decision tree T with that profile and a data vector v #outputs the prediction made by the tree regarding the target value of v Predict:=proc(P,T,v) local N,L,i,RootLabel,T1: L:=P[1]: N:=P[2]: #1<=v[i]<=L[i] #nops(v)=nops(L) if nops(v)<>nops(L) then RETURN(FAIL): fi: for i from 1 to nops(v) do if not (1<=v[i] and v[i]<=L[i]) then RETURN(FAIL): fi: od: if T[2]=[] then if not (T[1]>=1 and T[1]<=N) then RETURN(FAIL): else RETURN(T[1]): fi: fi: RootLabel:=T[1]: T1:=T[2][v[RootLabel]]: Predict(P,T1,v): end: #[v,Target] #Breakup(P,S,f): inputs a profile of a data-set and a data-set S of vectors of length #nops(P[1]) and a desription feature f( between 1 and nops(P[1]) outputs #the list of subsets according to the value of v[f] (there are P[1][f] of them) #Modified after class Breakup:=proc(P,S,f) local L,N,i, SS,v: L:=P[1]: N:=P[2]: if not (1<=f and f<=nops(P[1])) then RETURN(FAIL): fi: for i from 1 to L[f] do SS[i]:=[]: od: for i from 1 to nops(S) do v:=S[i][2]; SS[v[f]]:=[op(SS[v[f]]), S[i]]: od: [seq(SS[i],i=1..L[f])]: end: #end from C17.txt Dodam:=proc(N,k) local G: G:=subs({false=1,true=2},ABTP(N,k)): [seq([G[i][1], G[i][2]+[1$k], G[i][3]],i=1..nops(G))]: end: #H(P,DS): the entropy according to the target feature of the data set DS #[Name,v,t] (1<=t<=N), N=P[2]; H:=proc(P,DS) local co,i,T,t,N,s: N:=P[2]: co:=[0$N]: for i from 1 to nops(DS) do t:=DS[i][3]: co:=co+[0$(t-1),1,0$(N-t)]: od: s:=convert(co,`+`): co:=co/s: evalf(SEnt(co)): end: #IG(P,DS,f): the information gain in picking f as the root of the decision tree #Try: IG([[2,2,2],2],Dodam(20,3)),1); IG:=proc(P,DS,f) local k,DSlist,N: N:=P[2]: k:=nops(P[1]): if not (1<=f and f<=k) then RETURN(FAIL): fi: DSlist:=Breakup(P,DS,f): H(P,DS)- add( nops(DSlist[i])/nops(DS)*H(P,DSlist[i]),i=1..P[1][f]): end: ##end old stuff #TBU(P,DS): inputs P and DS P=[L,N], and outputs a list of length #N such that its i-th entry is the number of data points with feature i TBU:=proc(P,DS) local i,co,N: N:=P[2]: for i from 1 to N do co[i]:=0: od: for i from 1 to nops(DS) do co[DS[i][3]]:=co[DS[i][3]]+1: od: [seq(co[i],i=1..N)]: end: #Majority(P,DS): Inputs P: a profile of our data [L,N] where #L is a list of length k (where k is the number of descriptive fearues) #and L[k=i] is the number of different values {1,2, ... L[i]} that #descriptive feature i can assume #data set DS=[ [Name, Descriptive Vector, TargetValue] Majority:=proc(P,DS) : max[index](TBU(P,DS)): end: #MakeDS1(P,DS,AF,DSprev): implements the ID3 algorithm #inputs P=[L,N], and a data set #DS=[ ..., [Name, v,t], ...] MakeDS1:=proc(P,DS,AF,DSprev) local i, T,TarS,Cont,champ,rec,hope,RL,SDS: if DS={} then RETURN([Majority(P,DSprev),[]]): fi: TarS:={seq(DS[i][3],i=1..nops(DS))}: if nops(TarS)=1 then RETURN([TarS[1],[]]): fi: if AF={} then RETURN([Majority(P,DS),[]]): fi: champ:=AF[1]: rec:=IG(P,DS,champ): for i from 2 to nops(AF) do hope:=IG(P,DS,AF[i]): if hope>rec then champ:=AF[i]: rec:=hope: fi: od: SDS:=Breakup(P,DS,champ): [champ,[seq(MakeDS1(P,SDS[i],AF minus {champ},DS),i=1..nops(SDS))]]: end: #MakeDS(P,DS): implements the ID3 algorithm (added after class) MakeDS:=proc(P,DS) local i : MakeDS1(P,DS,{seq(i,i=1..nops(P[1]))},DS): end: