#C17.txt, Decision trees, March 28, 2019 Help:=proc(): print(`RandDT(P,d), RandDT1(P,d,AF) , Predict(P,T,v)`): print(`Breakup(P,S,f)`): end: #If there are k desriptive features, we name them 1, ..., k #and descriptive feature i has a[i] possible values (named 1, ...i) #and the target feature has N possible values #[[a[1], ..., a[k]],N]: #For example if "divisible by 2", "by 3" and by 5, and the target "is prime" #[[2,2,2],2] #RandDT1(P,d,AF): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d, with vertex-labels fromAF RandDT1:=proc(P,d,AF) local L,N,T1,T,RootLabel,AF1,k,i: L:=P[1]: N:=P[2]: if d=0 then RETURN([rand(1..N)(),[]]): fi: RootLabel:=AF[rand(1..nops(AF))()]: AF1:=AF minus {RootLabel}: #k:=number of subtrees of descriptive feature RootLabel k:=L[RootLabel]: T:=[]: for i from 1 to k do T1:=RandDT1(P,d-1,AF1): T:=[op(T),T1]: od: [RootLabel,T]: end: #RandDT(P,d): P=[L,N] inputs the type of the data and an integer d, outputs #a random decision tree of depth <=d RandDT:=proc(P,d) local i: if d>nops(P[1]) then print(`The depth can be at most`, nops(P[1])): RETURN(FAIL): fi: RandDT1(P,d,{seq(i,i=1..nops(P[1]))}): end: #Predict(P,T,v): Inputs a profile P #and a decision tree T with that profile and a data vector v #outputs the prediction made by the tree regarding the target value of v Predict:=proc(P,T,v) local N,L,i,RootLabel,T1: L:=P[1]: N:=P[2]: #1<=v[i]<=L[i] #nops(v)=nops(L) if nops(v)<>nops(L) then RETURN(FAIL): fi: for i from 1 to nops(v) do if not (1<=v[i] and v[i]<=L[i]) then RETURN(FAIL): fi: od: if T[2]=[] then if not (T[1]>=1 and T[1]<=N) then RETURN(FAIL): else RETURN(T[1]): fi: fi: RootLabel:=T[1]: T1:=T[2][v[RootLabel]]: Predict(P,T1,v): end: #[v,Target] #Breakup(P,S,f): inputs a profile of a data-set and a data-set S of vectors of length #nops(P[1]) and a desription feature f( between 1 and nops(P[1]) outputs #the list of subsets according to the value of v[f] (there are P[1][f] of them) #Modified after class Breakup:=proc(P,S,f) local L,N,i, SS,v: L:=P[1]: N:=P[2]: if not (1<=f and f<=nops(P[1])) then RETURN(FAIL): fi: for i from 1 to L[f] do SS[i]:=[]: od: for i from 1 to nops(S) do v:=S[i][2]; SS[v[f]]:=[op(SS[v[f]]), S[i]]: od: [seq(SS[i],i=1..L[f])]: end: