Skip to content

Some examples of do-calculus

Creative Commons LicenseaGrUMinteractive online version
from IPython.display import display, Math
import pyagrum as gum
import pyagrum.lib.notebook as gnb
import pyagrum.causal as csl
import pyagrum.causal.notebook as cslnb
bn = gum.fastBN("w->x->z->y;w->z")
bn.cpt("w")[:] = [0.7, 0.3]
bn.cpt("x")[:] = [[0.4, 0.6], [0.3, 0.7]]
bn.cpt("z")[{"w": 0, "x": 0}] = [0.2, 0.8]
bn.cpt("z")[{"w": 0, "x": 1}] = [0.1, 0.9]
bn.cpt("z")[{"w": 1, "x": 0}] = [0.9, 0.1]
bn.cpt("z")[{"w": 1, "x": 1}] = [0.5, 0.5]
bn.cpt("y")[:] = [[0.1, 0.9], [0.8, 0.2]]
d = csl.CausalModel(bn, [("lat1", ["x", "y"])])
## csl.causalImpact(d,"y",{"x":0})
cslnb.showCausalImpact(d, "y", "x", values={"x": 0})
cslnb.showCausalImpact(d, "y", "x", values={"x": 1})
lat1 x x lat1->x y y lat1->y w w w->x z z w->z x->z z->y
Causal Model
P(ydo(x))=w,z(xP(xw)P(yw,x,z))P(zw,x)P(w)\begin{equation*}P( y \mid \text{do}(x)) = \sum_{w,z}{\left(\sum_{x'}{P\left(x'\mid w\right) \cdot P\left(y\mid w,x',z\right)}\right) \cdot P\left(z\mid w,x\right) \cdot P\left(w\right)}\end{equation*}


Explanation : Do-calculus computations

y
0
1
0.51300.4870

Impact
lat1 x x lat1->x y y lat1->y w w w->x z z w->z x->z z->y
Causal Model
P(ydo(x))=w,z(xP(xw)P(yw,x,z))P(zw,x)P(w)\begin{equation*}P( y \mid \text{do}(x)) = \sum_{w,z}{\left(\sum_{x'}{P\left(x'\mid w\right) \cdot P\left(y\mid w,x',z\right)}\right) \cdot P\left(z\mid w,x\right) \cdot P\left(w\right)}\end{equation*}


Explanation : Do-calculus computations

y
0
1
0.64600.3540

Impact

Since we have the formula, let us compute by hand this intervention :

(((bn.cpt("x") * bn.cpt("y")).sumOut(["x"]) * bn.cpt("w") * bn.cpt("z")).sumOut(["z", "w"])).putFirst("y")
y
x
0
1
0
0.51300.4870
1
0.64600.3540
bn = gum.fastBN("Z1->X->Z2->Y")
d = csl.CausalModel(bn, [("L1", ["Z1", "X"]), ("L2", ["Z1", "Z2"]), ("L3", ["Z1", "Y"]), ("L4", ["Y", "X"])], True)
cslnb.showCausalImpact(d, "Y", "X", values={"X": 1})
L1 Z1 Z1 L1->Z1 X X L1->X L2 L2->Z1 Z2 Z2 L2->Z2 L3 L3->Z1 Y Y L3->Y L4 L4->X L4->Y Z1->X X->Z2 Z2->Y
Causal Model
Hedge Error: G={'Z2', 'Z1', 'X'}, G[S]={'Z2'}
Impossible
No result
Impact
modele4 = gum.BayesNet()
modele4.add(gum.LabelizedVariable("Smoking"))
modele4.add(gum.LabelizedVariable("Cancer"))
modele4.add(gum.LabelizedVariable("Tar"))
modele4.addArc(0, 2)
modele4.addArc(2, 1)
modele4.addArc(0, 1)
## Smoking
modele4.cpt(0)[:] = [0.5, 0.5]
## Tar
modele4.cpt(2)[{"Smoking": 0}] = [0.4, 0.6]
modele4.cpt(2)[{"Smoking": 1}] = [0.3, 0.6]
## Cancer
modele4.cpt(1)[{"Smoking": 0, "Tar": 0}] = [0.1, 0.9] # No Drug, Male -> healed in 0.8 of cases
modele4.cpt(1)[{"Smoking": 0, "Tar": 1}] = [0.15, 0.85] # No Drug, Female -> healed in 0.4 of cases
modele4.cpt(1)[{"Smoking": 1, "Tar": 0}] = [0.2, 0.8] # Drug, Male -> healed 0.7 of cases
modele4.cpt(1)[{"Smoking": 1, "Tar": 1}] = [0.25, 0.75]
d4 = csl.CausalModel(modele4, [("Genotype", ["Smoking", "Cancer"])], False)
cslnb.showCausalModel(d4)

svg

try:
a = csl.doCalculusWithObservation(d4, "Cancer", {"Smoking"})
except csl.HedgeException as h:
print(h.message)
display(Math(a.toLatex()))

P(Cancerdo(Smoking))=TarP(TarSmoking)(SmokingP(Smoking)P(CancerSmoking,Tar))\displaystyle P( Cancer \mid \text{do}(Smoking)) = \sum_{Tar}{P\left(Tar\mid Smoking\right) \cdot \left(\sum_{Smoking'}{P\left(Smoking'\right) \cdot P\left(Cancer\mid Smoking',Tar\right)}\right)}

try:
adjj = a.eval()
except csl.UnidentifiableException as u:
print(u.message)
print(adjj)
|| Cancer |
Smokin||0 |1 |
------||---------|---------|
0 || 0.1774 | 0.8226 |
1 || 0.1626 | 0.7374 |
formula, adj, exp = csl.causalImpact(d4, "Cancer", "Smoking", values={"Smoking": 0})
display(Math(formula.toLatex()))
adj

P(Cancerdo(Smoking))=TarP(TarSmoking)(SmokingP(CancerSmoking,Tar)P(Smoking))\displaystyle P( Cancer \mid \text{do}(Smoking)) = \sum_{Tar}{P\left(Tar\mid Smoking\right) \cdot \left(\sum_{Smoking'}{P\left(Cancer\mid Smoking',Tar\right) \cdot P\left(Smoking'\right)}\right)}

Cancer
0
1
0.17740.8226
m = gum.fastBN("z2->x->z1->y;z2->z1;z2->z3->y")
m.cpt("z2")[:] = [0.5, 0.5]
m.cpt("x")[:] = [
[0.4, 0.6], # z2=0
[0.4, 0.6],
] # z2=1
m.cpt("z3")[:] = [
[0.3, 0.7], # z2=0
[0.3, 0.7],
] # z2=1
m.cpt("z1")[{"z2": 0, "x": 0}] = [0.2, 0.8]
m.cpt("z1")[{"z2": 0, "x": 1}] = [0.25, 0.75]
m.cpt("z1")[{"z2": 1, "x": 0}] = [0.1, 0.9]
m.cpt("z1")[{"z2": 1, "x": 1}] = [0.15, 0.85]
m.cpt("y")[{"z1": 0, "z3": 0}] = [0.5, 0.5]
m.cpt("y")[{"z1": 0, "z3": 1}] = [0.45, 0.55]
m.cpt("y")[{"z1": 1, "z3": 0}] = [0.4, 0.6]
m.cpt("y")[{"z1": 1, "z3": 1}] = [0.35, 0.65]
d = csl.CausalModel(m, [("X-Z2", ["x", "z2"]), ("X-Z3", ["x", "z3"]), ("X-Y", ["x", "y"]), ("Y-Z2", ["y", "z2"])], True)
cslnb.showCausalModel(d)

svg

try:
formula, result, msg = csl.causalImpact(d, on={"y", "z2", "z1", "z3"}, doing={"x"})
except csl.HedgeException as h:
print(h.message)
print(msg)
display(Math(formula.toLatex()))
Do-calculus computations

P(z3,z2,z1,ydo(x))=P(z3z2)P(z1x,z2)P(z2)xP(xz2)P(z2)P(z3x,z2)P(yx,z1,z2,z3)x,yP(xz2)P(z2)P(z3x,z2)P(yx,z1,z2,z3)\displaystyle P( z3,z2,z1,y \mid \text{do}(x)) = P\left(z3\mid z2\right) \cdot P\left(z1\mid x,z2\right) \cdot P\left(z2\right) \cdot \frac {\sum_{x'}{P\left(x'\mid z2\right) \cdot P\left(z2\right) \cdot P\left(z3\mid x',z2\right) \cdot P\left(y\mid x',z1,z2,z3\right)}}{\sum_{x',y'}{P\left(x'\mid z2\right) \cdot P\left(z2\right) \cdot P\left(z3\mid x',z2\right) \cdot P\left(y'\mid x',z1,z2,z3\right)}}

## computation for this formula directly in pyAgrum
f1 = m.cpt("x") * m.cpt("z2") * m.cpt("z3") * m.cpt("y")
f2 = f1.sumOut(["x"])
f3 = f1.sumOut(["x", "y"])
f4 = f2 / f3
pyResult = m.cpt("z3") * m.cpt("z1") * m.cpt("z2") * f4
## computation for this formula directly by creating the causal AST
a = csl.ASTposteriorProba(m, {"z1"}, {"x", "z2"})
b = csl.ASTposteriorProba(m, {"y", "z3"}, {"x", "z1", "z2"})
c = csl.ASTjointProba(["x", "z2"])
correct = csl.ASTmult(a, csl.ASTsum(["x"], csl.ASTmult(b, c)))
print("According to [ref], the result should be :")
display(Math(correct.toLatex()))
According to [ref], the result should be :

P(z1x,z2)(xP(y,z3z1,z2)P(x,z2))\displaystyle P\left(z1\mid x,z2\right) \cdot \left(\sum_{x}{P\left(y,z3\mid z1,z2\right) \cdot P\left(x,z2\right)}\right)

## computation for that formula
ie = gum.LazyPropagation(m)
refResult = (ie.evidenceJointImpact(["y", "z3"], ["x", "z1", "z2"]) * ie.evidenceJointImpact(["x", "z2"], [])).sumOut(
["x"]
) * m.cpt("z1")
print(
"Maximum error between these 3 versions : {}".format(
max((refResult - pyResult).abs().max(), (refResult - result).abs().max(), (pyResult - result).new_abs().max())
)
)
Maximum error between these 3 versions : 5.551115123125783e-17
m1 = gum.fastBN("z1->x->z2->y")
cdg = csl.CausalModel(
m1, [("Z1−X", ["z1", "x"]), ("Z1-Y", ["z1", "y"]), ("Z1-Z1", ["z1", "z2"]), ("X−Y", ["x", "y"])], True
)
cslnb.showCausalModel(cdg)

svg

err = cslnb.showCausalImpact(cdg, "y", "x", values={"x": 0})
Z1−X z1 z1 Z1−X->z1 x x Z1−X->x Z1-Y Z1-Y->z1 y y Z1-Y->y Z1-Z1 Z1-Z1->z1 z2 z2 Z1-Z1->z2 X−Y X−Y->x X−Y->y z1->x x->z2 z2->y
Causal Model
Hedge Error: G={'x', 'z2', 'z1'}, G[S]={'z2'}
Impossible
No result
Impact
## EXEMPLE PAGE 17 : <http://ftp.cs.ucla.edu/pub/stat_ser/r350.pdf>
m1 = gum.BayesNet()
m1.add(gum.LabelizedVariable("x"))
m1.add(gum.LabelizedVariable("y"))
m1.add(gum.LabelizedVariable("z1"))
m1.add(gum.LabelizedVariable("z2"))
m1.add(gum.LabelizedVariable("z3"))
m1.addArc(2, 4)
m1.addArc(2, 0)
m1.addArc(3, 4)
m1.addArc(3, 1)
m1.addArc(4, 1)
m1.addArc(4, 0)
m1.addArc(0, 1)
gnb.showBN(m1)
d = csl.CausalModel(m1)

svg

display(Math(csl.identifyingIntervention(d, {"z1", "z2", "z3", "y"}, {"x"}).toLatex()))

P(z3z1,z2)P(z2)P(z1)P(yx,z2,z3)\displaystyle P\left(z3\mid z1,z2\right) \cdot P\left(z2\right) \cdot P\left(z1\right) \cdot P\left(y\mid x,z2,z3\right)

display(Math(csl.identifyingIntervention(d, {"y"}, {"x"}).toLatex()))

z1,z2,z3P(z3z1,z2)P(z2)P(z1)P(yx,z2,z3)\displaystyle \sum_{z1,z2,z3}{P\left(z3\mid z1,z2\right) \cdot P\left(z2\right) \cdot P\left(z1\right) \cdot P\left(y\mid x,z2,z3\right)}

display(Math(csl.identifyingIntervention(d, {"z1", "z3", "y"}, {"x", "z2"}).toLatex()))

P(z3z1,z2)P(z1)P(yx,z2,z3)\displaystyle P\left(z3\mid z1,z2\right) \cdot P\left(z1\right) \cdot P\left(y\mid x,z2,z3\right)

display(Math(csl.identifyingIntervention(d, {"y"}, {"x", "z2"}).toLatex()))

z1,z3P(z3z1,z2)P(z1)P(yx,z2,z3)\displaystyle \sum_{z1,z3}{P\left(z3\mid z1,z2\right) \cdot P\left(z1\right) \cdot P\left(y\mid x,z2,z3\right)}

## <http://www.stats.ox.ac.uk/~lienart/gml15_causalinference.html>
m1 = gum.BayesNet()
m1.add(gum.LabelizedVariable("a"))
m1.add(gum.LabelizedVariable("p"))
m1.add(gum.LabelizedVariable("b"))
m1.add(gum.LabelizedVariable("y"))
m1.addArc(0, 1)
m1.addArc(1, 2)
m1.addArc(0, 3)
m1.addArc(1, 3)
m1.addArc(2, 3)
gnb.showBN(m1)
d = csl.CausalModel(m1)

svg

display(Math(csl.identifyingIntervention(d, {"y"}, {"a", "b"}).toLatex()))

pP(ya,b,p)P(pa)\displaystyle \sum_{p}{P\left(y\mid a,b,p\right) \cdot P\left(p\mid a\right)}

## <https://cse.sc.edu/~mgv/talks/AIM2010.ppt> , example (f)
m1 = gum.BayesNet()
m1.add(gum.LabelizedVariable("X"))
m1.add(gum.LabelizedVariable("Y"))
m1.add(gum.LabelizedVariable("Z1"))
m1.add(gum.LabelizedVariable("Z2"))
m1.addArc(0, 1)
m1.addArc(0, 2)
m1.addArc(2, 3)
m1.addArc(3, 1)
m1.addArc(2, 1)
d = csl.CausalModel(m1, [("l1", ["X", "Z1"]), ("l2", ["Y", "Z1"])], True)
cslnb.showCausalModel(d)

svg

try:
display(Math(csl.identifyingIntervention(d, {"Y"}, {"X"}).toLatex()))
except csl.HedgeException as e:
print("Hedge exception : {}".format(e))
Hedge exception : Hedge Error: G={'Z1', 'X', 'Y'}, G[S]={'Z1', 'Y'}
bn = gum.fastBN("Z1->Z2->Z3->Y<-X->Z2;Z2->Y;Z1->X->Z3<-Z1")
gnb.showBN(bn)

svg

c = csl.CausalModel(bn, [("Z0", ("X", "Z1", "Z3"))], False)
cslnb.showCausalModel(c)

svg

formula, impact, explanation = csl.causalImpact(c, "Y", "X")
cslnb.showCausalImpact(c, "Y", "X")
Z0 Z1 Z1 Z0->Z1 Z3 Z3 Z0->Z3 X X Z0->X Z2 Z2 Z1->Z2 Z2->Z3 Y Y Z2->Y Z3->Y X->Z2 X->Y
Causal Model
P(Ydo(X))=Z1,Z2,Z3P(YX,Z2,Z3)P(Z2X,Z1)(XP(Z1)P(Z3X,Z1,Z2)P(XZ1))\begin{equation*}P( Y \mid \text{do}(X)) = \sum_{Z1,Z2,Z3}{P\left(Y\mid X,Z2,Z3\right) \cdot P\left(Z2\mid X,Z1\right) \cdot \left(\sum_{X'}{P\left(Z1\right) \cdot P\left(Z3\mid X',Z1,Z2\right) \cdot P\left(X'\mid Z1\right)}\right)}\end{equation*}


Explanation : Do-calculus computations

Y
X
0
1
0
0.57970.4203
1
0.55210.4479

Impact