###################################################################### ##SimpsonParadox.txt: Save this file as SimpsonParadox.txt # ## To use it, stay in the # ##same directory, get into Maple (by typing: maple ) # ##and then type: read SimpsonParadox.txt # ##Then follow the instructions given there # ## # ##Written by Doron Zeilberger, Rutgers University , # #zeilberg at math dot rutgers dot edu # ###################################################################### #Created: Nov. 2018 print(`Created: Nov. 2018`): print(` This is SimpsonParadox.txt `): print(`It is the Maple package that accompanies the article `): print(` Using Experimental Mathematics to Investigate the Amazing Simpson Paradox`): print(`by Shalosh B. Ekhad and Doron Zeilberger`): print(`and also available from Zeilberger's website`): print(``): print(`Please report bugs to zeilberg at math dot rutgers dot edu`): print(``): print(`The most current version of this package and paper`): print(` are available from`): print(`http://www.math.rutgers.edu/~zeilberg/ .`): print(`---------------------------------------`): print(`For a list of the Supporting procedures type ezra1();, for help with`): print(`a specific procedure, type ezra(procedure_name); .`): print(``): print(`---------------------------------------`): print(`---------------------------------------`): print(`For a list of the MAIN procedures type ezra();, for help with`): print(`a specific procedure, type ezra(procedure_name); .`): print(``): print(`---------------------------------------`): with(combinat): ezra1:=proc() if args=NULL then print(` The supporting procedures are: AreaR, GP1, S2, S2wZ `): print(``): else ezra(args): fi: end: ezra:=proc() if args=NULL then print(`The main procedures are: NuSi, NuSiE, S1, S1E, S1wZ, G, GwZ , Paper, PaperE, Zk`): print(` `): elif nops([args])=1 and op(1,[args])=AreaR then print(`AreaR(A1,A2,B1,B2,C): assuming that B2-B1 M2+W2.`): print(` For example, try: `): print(` G(10,15,m1,w1,m2,w2); `): elif nops([args])=1 and op(1,[args])=GP1 then print(`GP1(S1,x): Given a data set S1 guesses a polynomial P in x such that P(S1[i][1])=S1[i][2] for each member of S1. Try:`): print(`GP1({seq([i,i^2],i=10..20)},x);`): elif nops([args])=1 and op(1,[args])=GwZ then print(`GwZ(p,q,m1,w1,m2,w2): Like G(p,q,m1,w1,m2,w2) but also allowing 0 cures`): print(`the generating function of all occurences of Simpson's paradox with `): print(` Hospital A admitting p men and q women and `): print(` Hospital B admitting q men and p women. `): print(`It is the sum of all monomials m1^M1*w1^W1*m2^M2*w2^W2`): print(`0<=M1<=p, 0<=W1<=q, 0<=M2<=q, 0<=W2<=p`): print(`Hospital 1 cures M1 of its p admitted men`): print(`Hospital 1 cures W1 of its q admitted women`): print(`Hospital 2 cures M2 of its q admitted men`): print(` Hospital 2 cures W2 of its p admitted women `): print(` and Hospital 2 does better than Hospital 1 when viewed from the narrow (parochial) perspectives of both men and women `): print(` but Hospital 1 cures more people, in other words, `): print(` M1/p < M2/q `): print(` W1/q < W2/p but `): print(`M1+W1> M2+W2.`): print(` For example, try: `): print(` GwZ(10,15,m1,w1,m2,m2); `): elif nops([args])=1 and op(1,[args])=IsSim then print(`IsSim(L): Inputs # a 2 by 2 matrix whose entries are pairs of integers of the form L=[ [ [x11,p11],[x12,p12]], [[x21,p21], [x22,p22]] ]`): print(`decides whether it is a Simpson matrix, i.e. whether x11/p11(x12+x22)/(p12+p22).`): print(`Here we do not allow 0 or 1`): print(`For example, to confirm the example on p.2 of "Causal Inference in Statistics' by Judea Pearl, Madelyn Glymour, and Nicholas Jewell"`): print(`also quoted in wikipedia, type`): print(`IsSim( [ [ [234,270], [81,87]], [ [55,80], [192,263]] ] );`): elif nops([args])=1 and op(1,[args])=NuSi then print(`NuSi(a,b,n): inputs positive integers a and b and a symbol n, outputs a polynomial expression ( a polynomial of`): print(`degree 4) in n, giving the number of Simpson scenarios `): print(`[ [ [x1,a*n],[y1,b*n]], [[x2,b*n], [y2,a*n]] ]}. Try:`): print(`NuSi(2,3,n);`): elif nops([args])=1 and op(1,[args])=NuSiE then print(`NuSiE(a,b,n): Extended version of NuSi(a,b,n). Inputs positive integers a and b and a symbol n, outputs a polynomial expression ( a polynomial of`): print(`degree 4) in n, giving the number of Extended Simpson scenarios `): print(`[ [ [x1,a*n],[y1,b*n]], [[x2,b*n], [y2,a*n]] ]}. Try:`): print(`NuSiE(2,3,n);`): elif nops([args])=1 and op(1,[args])=Paper then print(`Paper(n,A,N): inputs a symbol, n, and a positive integer, N, and outputs an article with explicit expressions `): print(`for 1<=ax2+y2 `): print(`For example, try:`): print(`S1(10,5); `): elif nops([args])=1 and op(1,[args])=S1E then print(`S1E(p,q): Extended convention analog of S1(p,q). Inputs positive integers p and q and outputs all the pairs of pairs`): print(` [ [[x1,p],[y1,q]], [[x2,q],[y2,p]] such that 0<=x1<=p, 0<=y1<=q, 0<=x2<=q, 0<=y2<=p,`): print(`and x1/px2+y2 `): print(`For example, try:`): print(`S1E(10,5); `): elif nops([args])=1 and op(1,[args])=S1wZ then print(`S1wZ(p,q): inputs positive integers p and q and outputs all the pairs of pairs`): print(` [ [[x1,p],[y1,q]], [[x2,q],[y2,p]] such that 0<=x1<=p, 0<=y1<=q, 0<=x2<=q, 0<=y2<=p,`): print(`and x1/px2+y2 `): print(`For example, try:`): print(`S1wZ(10,5); `): elif nops([args])=1 and op(1,[args])=S2 then print(`S2(p,q): Like S1(p,q) but via generating functions. Much slower. `): print(`For example, try:`): print(`S2(10,5); `): elif nops([args])=1 and op(1,[args])=S2wZ then print(`S2wZ(p,q): Like S1wZ(p,q) but via generating functions. Much slower. `): print(`For example, try:`): print(`S2wZ(10,5); `): elif nops([args])=1 and op(1,[args])=Zk then print(`Zk(k): 1/24*((k-1)/k)^2, the asymptotic probability of a Simpson scenario with n men who took the drug,`): print(`k*n men who did not take the drug, k*n women who took the drug, and n wemen who did not take the drug.`): print(`Try:`): print(`Zk(k);`): else print(`There is no ezra for`,args): fi: end: ez:=proc(): print(` S1(p,q), S1wZ(p,q), G(p,q,m1,w1,m2,w2), GwZ(p,q,m1,w1,m2,w2), S2(p,q), S2wZ(p,q), `): end: ##############start GP1 #GP1(S1,x): Given a data set S1 guesses a polynomial P in x such that P(S1[i][1])=S1[i][2] for each member of S1. Try: #GP1({seq([i,i^2],i=10..20)},x); #GenPol1(x,dx,a): a generic polynomial of degree dx in x #with coeffs. parametrized by a[i] followed by the set #of coeffs. GenPol1:=proc(x,dx,a) local i: add(a[i]*x^i,i=0..dx),{seq(a[i],i=0..dx)}: end: #GP1d(S1,x,d): guesses a polynomial in x of degree d #that fits the data in DS1 GP1d:=proc(S1,x,d) local eq,var,P,a,var1,i: P:=GenPol1(x,d,a): var:=P[2]: P:=P[1]: if nops(S1)FAIL then RETURN(gu): fi: od: FAIL: end: ###############end GP1 S1:=proc(p,q) local gu,x1,x2,y1,y2: gu:={}: for x1 from 1 to p-1 do for y1 from trunc(q*x1/p) to q-1 do for x2 from 1 to q-1 do for y2 from trunc(p*x2/q) to p-1 do if x1/py1+y2 then gu:= gu union {[ [ [x1,p],[y1,q]], [[x2,q], [y2,p]] ]}: fi: od: od: od: od: gu: end: S1wZ:=proc(p,q) local gu,x1,x2,y1,y2: gu:={}: for x1 from 0 to p do for y1 from trunc(q*x1/p) to q do for x2 from 0 to q do for y2 from trunc(p*x2/q) to p do if x1/py1+y2 then gu:= gu union {[ [ [x1,p],[y1,q]], [[x2,q], [y2,p]] ]}: fi: od: od: od: od: gu: end: #GwZ(p,q,m1,w1,m2,w2): the generating function of all occurences of Simpson's paradox with #Hospital A admitting p men and q women and #Hospital B admitting q men and p women. #It is the sum of all monomials m1^M1*w1^W1*m2^M2*w2^W2 #Hospital 1 cures M1 of its p admitted men #Hospital 1 cures W1 of its q admitted women #Hospital 2 cures M2 of its q admitted men #Hospital 2 cures W2 of its p admitted women #and Hospital 2 does better than Hospital 1 when viewed from the narrow (parochial) perspectives of both men and women #but Hospital 1 cures more people, in other words, #M1/p < M2/q #W1/q < W2/p but #M1+W1> M2+W2. #For example, try: #GwZ(10,15,m1,w1,m2,m2) GwZ:=proc(p,q,m1,w1,m2,w2) local z1,z2,z3,gu1,gu2,gu3,gu4,gu,i: gu1:=expand(normal((1-(m1*z3/z1^q)^(p+1))/(1-m1*z3/z1^q))): gu2:=expand(normal((1-(w1*z3/z2^p)^(q+1))/(1-w1*z3/z2^p))): gu3:=expand(normal((1-(m2*z1^p/z3)^(q+1))/(1-m2*z1^p/z3))): gu4:=expand(normal((1-(w2*z2^q/z3)^(p+1))/(1-w2*z2^q/z3))): gu:=expand(gu1*gu2*gu3*gu4): gu:=add(coeff(gu,z1,i),i=1..degree(gu,z1)): gu:=subs(z1=1,gu): gu:=add(coeff(gu,z2,i),i=1..degree(gu,z2)): gu:=subs(z2=1,gu): gu:=add(coeff(gu,z3,i),i=1..degree(gu,z3)): gu:=subs(z3=1,gu): gu: end: S2wZ:=proc(p,q) local gu,m1,w1,m2,w2,i,mu,gu1: gu:=GwZ(p,q,m1,w1,m2,w2): mu:={}: for i from 1 to nops(gu) do gu1:=op(i,gu): mu:=mu union { [ [[degree(gu1,m1),p],[degree(gu1,m2),q]], [[degree(gu1,w1),q],[degree(gu1,w2),p]] ] }: od: mu: end: #G(p,q,m1,w1,m2,w2): the generating function of all occurences of Simpson's paradox with #Hospital A admitting p men and q women and #Hospital B admitting q men and p women. #It is the sum of all monomials m1^M1*w1^W1*m2^M2*w2^W2 #Hospital 1 cures M1 of its p admitted men #Hospital 1 cures W1 of its q admitted women #Hospital 2 cures M2 of its q admitted men #Hospital 2 cures W2 of its p admitted women #and Hospital 2 does better than Hospital 1 when viewed from the narrow (parochial) perspectives of both men and women #but Hospital 1 cures more people, in other words, #M1/p < M2/q #W1/q < W2/p but #M1+W1> M2+W2. #For example, try: #G(10,15,m1,w1,m2,m2) G:=proc(p,q,m1,w1,m2,w2) local z1,z2,z3,gu1,gu2,gu3,gu4,gu,i: gu1:=expand(m1*z3/z1^q*normal((1-(m1*z3/z1^q)^(p-1))/(1-m1*z3/z1^q))): gu2:=expand(w1*z3/z2^p*normal((1-(w1*z3/z2^p)^(q-1))/(1-w1*z3/z2^p))): gu3:= expand(m2*z1^p/z3*normal((1-(m2*z1^p/z3)^(q-1))/(1-m2*z1^p/z3))): gu4:=expand(w2*z2^q/z3*normal((1-(w2*z2^q/z3)^(p-1))/(1-w2*z2^q/z3))): gu:=expand(gu1*gu2*gu3*gu4): gu:=add(coeff(gu,z1,i),i=1..degree(gu,z1)): gu:=subs(z1=1,gu): gu:=add(coeff(gu,z2,i),i=1..degree(gu,z2)): gu:=subs(z2=1,gu): gu:=add(coeff(gu,z3,i),i=1..degree(gu,z3)): gu:=subs(z3=1,gu): gu: end: S2:=proc(p,q) local gu,m1,w1,m2,w2,i,mu,gu1: gu:=G(p,q,m1,w1,m2,w2): if gu=0 then RETURN({}): fi: mu:={}: if type(gu,`+`) then for i from 1 to nops(gu) do gu1:=op(i,gu): mu:=mu union { [ [[degree(gu1,m1),p],[degree(gu1,m2),q]], [[degree(gu1,w1),q],[degree(gu1,w2),p]] ] }: od: RETURN(mu): else gu1:=gu: RETURN( { [ [[degree(gu1,m1),p],[degree(gu1,m2),q]], [[degree(gu1,w1),q],[degree(gu1,w2),p]] ] }): fi: end: #AreaR(A1,A2,B1,B2): assuming that B2-B1(x12+x22)/(p12+p22). #For example, to confirm the example on p.2 of `Causal Inference in Statistics' by Judea Pearl, Madelyn Glymour, and Nicholas Jewell, type: #IsSim( [ [ [192,263],[55,80]], [ [81,87],[234,270]] ] ); IsSim:=proc(L) local i,j: for i from 1 to 2 do for j from 1 to 2 do if not (L[i][j][1]>0 and L[i][j][1] (L[1][2][1]+L[2][2][1])/(L[1][2][2]+L[2][2][2])) then RETURN(false): fi: true: end: #NuSi(a,b,n): inputs positive integers a and b and a symbol n, outputs a polynomial expression ( a polynomial of #degree 4) in n, giving the number of Simpson scenarios #[ [ [x1,a*n],[y1,b*n]], [[x2,b*n], [y2,a*n]] ]}. Try: #NuSi(2,3,n); NuSi:=proc(a,b,n) local gu,n1,lu: if not (type(a,integer) and type(b,integer) and a>0 and b>0 and gcd(a,b)=1 and type(n,symbol)) then print(`Bad input`): RETURN(FAIL): fi: gu:=[seq([n1,nops(S1(a*n1,b*n1))],n1=1..8)]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: gu:=[op(2..8,gu),[9,nops(S1(a*9,b*9))]]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: gu:=[op(2..8,gu),[10,nops(S1(a*10,b*10))]]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: FAIL: end: #Zk(k): 1/24*((k-1)/k)^2, the asymtotic probability of a Simpson scenario #with n men who took the drug, k*n men who did not take the drug, #k*n women who took the drug, and n wemen who did not take the drug. #Try: #Zk(k); Zk:=proc(k) ((k-1)/k)^2/24: end: #Paper(n,A,N): inputs a symbol, n, anda symbol A, and a positive integer, N, and outputs an article with explicit expressions #for 1<=a1) then print(`Bad input`): RETURN(FAIL): fi: co:=0: gu:=[]: print(`Explicit Expressions for the Exact Number of Simpson Paradox Scenarios`): print(``): print(`By Shalosh B. Ekhad .`): print(``): print(`Definition: For a and b posivie integers, let`, A[a,b](n), `be the number of Simpson Paradox scenarios with`): print(``): print( a*n,`men who took the drug`, b*n, `men who did not take the drug`): print(``): print( b*n, `women who took the drug, and `, a*n, `wemen who did not take the drug.`): for a from 1 to N do for b from a+1 to N do if gcd(a,b)=1 then gu1:=NuSi(a,b,n): if gu1<>FAIL then gu:=[op(gu),[[a,b],gu1]]: co:=co+1: print(`Theorem Number`, co): print(``): print(A[a,b](n)=gu1): print(``): print(`and in Maple format`): print(``): lprint(A[a,b](n)=gu1): print(``): k:=b/a: print(`hence the asymptotic frequency of a Simpson scenario is`): print(``): lu:=coeff(gu1,n,4)/(a*b)^2: print(lu): if lu<>Zk(k) then print(`OMG, this condracits the Ekhad-Zeilberger theorem that it should be`, Zk(k)): else print(`confirming that it is ((k-1)/k)^2/24, where k=b/a=`,b/a): fi: fi: fi: od: od: print(`To sum up here is the data in Maple format`): lprint(gu): print(``): print(`------------------------------------------------------------`): print(``): print(`This ends this article that took`, time()-t0, `seconds. to generate.`): end: ###start with Extended convention S1E:=proc(p,q) local gu,x1,x2,y1,y2: gu:={}: for x1 from 0 to p do for y1 from trunc(q*x1/p) to q do for x2 from 0 to q do for y2 from trunc(p*x2/q) to p do if x1/py1+y2 then gu:= gu union {[ [ [x1,p],[y1,q]], [[x2,q], [y2,p]] ]}: fi: od: od: od: od: gu: end: #NuSiE(a,b,n): inputs positive integers a and b and a symbol n, outputs a polynomial expression ( a polynomial of #degree 4) in n, giving the number of Simpson scenarios with the extended convention (where the fractions 0 and 1 are allowed). try #[ [ [x1,a*n],[y1,b*n]], [[x2,b*n], [y2,a*n]] ]}. Try: #NuSiE(2,3,n); NuSiE:=proc(a,b,n) local gu,n1,lu: if not (type(a,integer) and type(b,integer) and a>0 and b>0 and gcd(a,b)=1 and type(n,symbol)) then print(`Bad input`): RETURN(FAIL): fi: gu:=[seq([n1,nops(S1E(a*n1,b*n1))],n1=1..8)]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: gu:=[op(2..8,gu),[9,nops(S1E(a*9,b*9))]]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: gu:=[op(2..8,gu),[10,nops(S1E(a*10,b*10))]]: lu:=GP1({op(gu)},n): if lu<>0 and lu<>FAIL then RETURN(lu): fi: FAIL: end: #PaperE(n,A,N): inputs a symbol, n, anda symbol A, and a positive integer, N, and outputs an article with explicit expressions #for 1<=a1) then print(`Bad input`): RETURN(FAIL): fi: co:=0: gu:=[]: print(`Explicit Expressions for the Exact Number of Extended Simpson Paradox Scenarios`): print(``): print(`By Shalosh B. Ekhad .`): print(``): print(`Definition: For a and b posivie integers, let`, A[a,b](n), `be the number of Extended Simpson Paradox scenarios with`): print(``): print( a*n,`men who took the drug`, b*n, `men who did not take the drug`): print(``): print( b*n, `women who took the drug, and `, a*n, `wemen who did not take the drug.`): for a from 1 to N do for b from a+1 to N do if gcd(a,b)=1 then gu1:=NuSiE(a,b,n): if gu1<>FAIL then gu:=[op(gu),[[a,b],gu1]]: co:=co+1: print(`Theorem Number`, co): print(``): print(A[a,b](n)=gu1): print(``): print(`and in Maple format`): print(``): lprint(A[a,b](n)=gu1): print(``): k:=b/a: print(`hence the asymptotic frequency of a Simpson scenario is`): print(``): lu:=coeff(gu1,n,4)/(a*b)^2: print(lu): if lu<>Zk(k) then print(`OMG, this condracits the Ekhad-Zeilberger theorem that it should be`, Zk(k)): else print(`confirming that it is ((k-1)/k)^2/24, where k=b/a=`,b/a): fi: fi: fi: od: od: print(`To sum up here is the data in Maple format`): lprint(gu): print(``): print(`------------------------------------------------------------`): print(``): print(`This ends this article that took`, time()-t0, `seconds. to generate.`): end: