#C18.txt, April Fool's Day, 2019 #Decison Trees Help:=proc(): print(` H(P,DS) , Dodam(N,k), IG(P,DS,f) `): print(`MakeDS(P,DS) `): end: #start C16.txt #C16.txt, Dr. Z. ExpMath, March 25, 2019 Help16:=proc(): print(` GuessWhoS(x,N), ManyGWS(N,K,t), GuessWhoC(x,N) , SEnt(P) `): print(`Profile(n,k), ABTP(N,k) `): end: #Simulating a stupid guess who game with a nunmber from 0 to N GuessWhoS:=proc(x,N) local co,i: co:=0: for i from 0 to N do #print(`Is the number equal to`, i): co:=co+1: if x=i then RETURN(co): fi: od: FAIL: end: #ManyGHS(N,K,t): the prob. generating function using variable t #fof K Guess who games, followed by the average and s.d. ManyGWS:=proc(N,K,t) local ra,i,f,mu,sig,f1: ra:=rand(0..N): f:=add(t^GuessWhoS(ra(),N),i=1..K)/K: mu:=subs(t=1,diff(f,t)): f1:=f/t^mu: sig:= sqrt(subs(t=1,diff(t*diff(f1,t),t))): [evalf([mu,sig]),sort(evalf(f))]: end: #Simulating a clever guess who game with a nunmber from 0 to N GuessWhoC:=proc(x,N) local co,L,U,U0: L:=0: U:=trunc((N-1)/2): co:=0: while L=L and x<=U then U:=trunc((U+L)/2): else U0:=U: U:=trunc(U+(U-L)/2): L:=U0+1: fi: od: co: end: #SEent(P): The (Shannon) amount of information in bits of the discere prob. distribution P SEnt:=proc(P) local i: if not (type(P,list) and min(op(P))>0 and convert(P,`+`)=1) then print(`bad input`): RETURN(FAIL): fi: -add(P[i]*log[2](P[i]),i=1..nops(P)): end: #[[Name,[DescriptiveFeature1, ..., DescriptiveFeature],targetFeature] , ...] #Profile(n,k): inputs a positive integer n and k outputs #a list of 0,1 of length k whose i-th item is "Is n divisible by prime[i]" for i from 1 to k #For example #Profile(10,3) should output [1,0,1] Profile:=proc(n,k) local i: subs({true=1, false=0},[ seq(evalb(n mod ithprime(i)=0),i=1..k)]): end: #Inputs a positive integer N and pos. integer k outputs an ABT #for the data generated from them using the k descriptive features #is it divisible by the k-th prime ABTP:=proc(N,k) local n: [ seq([n,Profile(n,k), isprime(n)],n=1..N)]: end: #end C16.txt #From C17.txt #C17.txt, Decision trees, March 28, 2019 Help17:=proc(): print(`RandDT(P,d), RandDT1(P,d,AF) , Predict(P,T,v)`): print(`Breakup(P,S,f)`): end: #If there are k desriptive features, we name them 1, ..., k #and descriptive feature i has a[i] possible values (named 1, ...i) #and the target feature has N possible values #[[a[1], ..., a[k]],N]: #For example if "divisible by 2", "by 3" and by 5, and the target "is prime" #[[2,2,2],2] #RandDT1(P,d,AF): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d, with vertex-labels fromAF RandDT1:=proc(P,d,AF) local L,N,T1,T,RootLabel,AF1,k,i: L:=P[1]: N:=P[2]: if d=0 then RETURN([rand(1..N)(),[]]): fi: RootLabel:=AF[rand(1..nops(AF))()]: AF1:=AF minus {RootLabel}: #k:=number of subtrees of descriptive feature RootLabel k:=L[RootLabel]: T:=[]: for i from 1 to k do T1:=RandDT1(P,d-1,AF1): T:=[op(T),T1]: od: [RootLabel,T]: end: #RandDT(P,d): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d RandDT:=proc(P,d) local i: if d>nops(P[1]) then print(`The depth can be at most`, nops(P[1])): RETURN(FAIL): fi: RandDT1(P,d,{seq(i,i=1..nops(P[1]))}): end: #Predict(P,T,v): Inputs a profile P #and a decision tree T with that profile and a data vector v #outputs the prediction made by the tree regarding the target value of v Predict:=proc(P,T,v) local N,L,i,RootLabel,T1: L:=P[1]: N:=P[2]: #1<=v[i]<=L[i] #nops(v)=nops(L) if nops(v)<>nops(L) then RETURN(FAIL): fi: for i from 1 to nops(v) do if not (1<=v[i] and v[i]<=L[i]) then RETURN(FAIL): fi: od: if T[2]=[] then if not (T[1]>=1 and T[1]<=N) then RETURN(FAIL): else RETURN(T[1]): fi: fi: RootLabel:=T[1]: T1:=T[2][v[RootLabel]]: Predict(P,T1,v): end: #[v,Target] #Breakup(P,S,f): inputs a profile of a data-set and a data-set S of vectors of length #nops(P[1]) and a desription feature f( between 1 and nops(P[1]) outputs #the list of subsets according to the value of v[f] (there are P[1][f] of them) #Modified after class Breakup:=proc(P,S,f) local L,N,i, SS,v: L:=P[1]: N:=P[2]: if not (1<=f and f<=nops(P[1])) then RETURN(FAIL): fi: for i from 1 to L[f] do SS[i]:=[]: od: for i from 1 to nops(S) do v:=S[i][2]; SS[v[f]]:=[op(SS[v[f]]), S[i]]: od: [seq(SS[i],i=1..L[f])]: end: #end from C17.txt Dodam:=proc(N,k) local G: G:=subs({false=1,true=2},ABTP(N,k)): [seq([G[i][1], G[i][2]+[1\$k], G[i][3]],i=1..nops(G))]: end: #H(P,DS): the entropy according to the target feature of the data set DS #[Name,v,t] (1<=t<=N), N=P[2]; H:=proc(P,DS) local co,i,T,t,N,s: N:=P[2]: co:=[0\$N]: for i from 1 to nops(DS) do t:=DS[i][3]: co:=co+[0\$(t-1),1,0\$(N-t)]: od: s:=convert(co,`+`): co:=co/s: evalf(SEnt(co)): end: #IG(P,DS,f): the information gain in picking f as the root of the decision tree #Try: IG([[2,2,2],2],Dodam(20,3)),1); IG:=proc(P,DS,f) local k,DSlist,N: N:=P[2]: k:=nops(P[1]): if not (1<=f and f<=k) then RETURN(FAIL): fi: DSlist:=Breakup(P,DS,f): H(P,DS)- add( nops(DSlist[i])/nops(DS)*H(P,DSlist[i]),i=1..P[1][f]): end: #MakeDS(P,DS): implements the ID3 algorithm #inputs P=[L,N], and a data set #DS=[ ..., [Name, v,t], ...] MakeDS:=proc(P,DS) local i, T,TarS: #line 1 TarS:={seq(DS[i][3],i=1..nops(DS))}: if nops(TarS)=1 then RETURN([TarS[1],[]]): fi: if P[1]=[] then Majority(DS): fi: end: