From 1aebad99cd42b7b48c9c5aa06b9b5afda3b3e40a Mon Sep 17 00:00:00 2001 From: priyadutt Date: Thu, 14 Dec 2023 12:50:10 +0530 Subject: [PATCH 1/2] added documentation for creating causal model out of equations feature Signed-off-by: priyadutt --- .../user_guide/modeling_gcm/causal_graph.png | Bin 0 -> 8356 bytes .../customizing_model_assignment.rst | 63 ++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 docs/source/user_guide/modeling_gcm/causal_graph.png diff --git a/docs/source/user_guide/modeling_gcm/causal_graph.png b/docs/source/user_guide/modeling_gcm/causal_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..f3750620d5225d96099253698c3716457246f997 GIT binary patch literal 8356 zcmZv?c{~)_8$WKwmMz!5g&O-3D!Wi3MqvztvSdlFHQ9G2*(yY&GD!9zOGFyOWG!x7 z``E@v2*YS$FyVXL&+qsB>-)#NyqtN?bDr~lmh&u=hOji{Vi#p+U|`^an;9b+7$7p> zBXNQSto$?0C<6ZIgqS#l*xbDn67Ck{$zb6Y66kj~#LwIPbeLyQu=m}73o6!qF%eE1ijk(Ai zCr|w5kGr8Gb1y#K=3#=m6@Ii8u6yD*ez1Zp>>^EDcLL|8`u` zLH?O6RF83P;g=R<9c{Ab6y{&OA_e=L?Y(SJX4H@aImwj4l)+HWc*9ld-=o9}mxQ)p zTiRRlTdoFq&SOoV)68HRGCAx$BY%^b$QKBOL~nQQFyp3sPi}3nTuqv!K{FW|6X?HM z{j9$631oGrsRLT1(AEvI(JMZV*OTipIL>nX$6*X+RKrtW{FsXSS9{zGmP9&U$a4LWPC|ND8c%JjEOYfM+d1} z(68?eZ<3gi-O6|`2ZL=_D;~pbMvkGFNw2D;Mnhr94xh{%65C@|P4#;!)(%{!Zpc3i zOOJ-ZkVQUMR%3~tw9@siY37WK|9?qs*s6Uu{`Y85=^J8J$zgLhC9C{we-4%+bH~jskvu$ z9)4u6cm9_1v<}gC{oj-?j{iH_*mxc88NatZH*@y=QQ>EtL=bV!da;)_OP?DEyFs7+ zS=n;RFJt$>xRe>oR>ox<;rqFKd;NU4OWcp9thtgX!>=9m&S=#?BaiCp947Wg2UO>D zutS6l%7p1|9Bc$O{$S*hv9LKUj`mjE{_vn@Gd$*JPte(n(mZvRfLVg2QV)g{4X`1zUglxksUHa~dKll^Qwa zuBO*SvQg9mp4iquLzAYUbKpAZV(2%a zU;bHn8?V3}MKy(7KsPpcjYHZPoET@iHVP>~im;b>sx?L29s z<2NZBBFDG_)k|IwLi?hNbXGLqxn{tSWiPL=y+SBkK;xLM%aFFq-~>iO#jjS%tA($4 zNwydUZEm6rrz-Ou#$T9V>n4|nW{GR;G6_ZvO^kxqZBpf)%ew1*89M1Ghy~ydoaSk|oo%9(un zgM}pm7hz+t^#jS?SacL2|52eQXeD#mCd>Ht9q7=i~+yVllKZSka} z(V5Pzb;4N!cR{dH$>DXL$VM}xDrJhtzC9UFbSa-~(4C74_1IvFC3=qwY!}v|lpc(7 zDr5S&-#M|)vdoe(j--EvghV!BTxD-T)a26U(@r$!9*s&icsKB1GLt2$J`^j-5Df5p z`$EA6#s|z2*kF4#bam{kK^ZH?l23`R*Y{c{GP+MzDtQkBJ?g zV!OzFk>wBcy(t+ z9Gggg#R)=(t~Nx>DU;~{m*zgF#M~Fn9Yp{4O=V}^_Ck&x| zF~J;*n0&Kog&a{bzUX1nD4eJt5le_AD6h=kI{xOSLJo|KpXb5-2$#(b8=QAt_`ULe zStk1`d)bLdY76Ti)UqUK{6{!pe6ZE9S3@;R-CAA)O^Z9 z6ZwO$k>hk_LCX%LcA9*S`vRWIVrhcH!|uLDJhZM{(`Tz*n^WNIc!1g_m0?{(GWgBe z{=!CCb!)nBiYDhlCn4dLSo>tYB;@@ESkhtxeSPOdmrTu=B?^CDSw4qX-gaB!(Rg`I zb(Fnp0kWkEYd`(MNZ}(()f%4d{S6d;1Ta+it8>*%8B?C(hT1lFM9P<$bzn)q&{CaS zy5z7LY|GLH24-y|UpB^o!W**G+Mw!60gt6N8r{RxvM>1z*o9TdnKlN}a?c>2e4JLJ zLTgcgD$S_sO9VB%Ye#3hZtZ~CPP+o@0>7Aq%x6d8SKM0~9&>grVM!H>FY`Vj_|5$X zqmS&!r^x)TlsF9&-nn71_PnOW>eo4u<9Y)+_K*GDVUE`FTo;P5G|rlkTBmWk_C)et z<+Pei0DGBRf3%@Q_1XcGquur(R063Dcj7ningtSDxh;frOskcR^P59C9meUW0M@qJ z@M`&-+Z@H%aM`dPzzTPwdJS(1;P7LYnUVz;0=v&wLa-f6YW zzJSBC_S-5Y$Z?!XZ7V$5S{`n3j3!U@TC9F~6+9my4|g_Nm_6s9EiRmF57F{K;j6#C z%rDJ1f{XHYuAWfF1i3|=wHuEv!A6zYvCE`Arp2)cS^^-p9Z=L$;Ouw&=De+l@o0m< z5aH<)f0zJgo!Usau6$0Zw-Jh3kr19z{DpAQc3deD95xzBrFC{z#iOW03BF*>(MBFV z5J9Z~*MHJZRP6ac5@;W{iW z#&uq;jTh(*g~G4C0*ib=J-r#2lk8pCH4C<+?4~zo0N3vrN1n;q+pg>~B7(u#^jxh@lHp-9jP1A>Bq`fWyp84ou(Lu|t;LG^cKvxbt+rl);Pt))G@@oVPJgCWgC((m zLty^`h)%hts5#ZH;rn!2bZT46(v9HKMkKl=IP_w3&00Y^xOO>LBSJa7C#vEk!%5b1 zkBZlG3o>E}0tCqhr3@>G)l%@(yDykLBX}trFg<#MqeC;`*QL{PCL>bdVHszjeDmes z2Sok&0^-S??S-O&;wK+9pJcGd&oez{*!*~?_i)ci^CyF~e46_3X%s#~>ZGt=3*Ckw1m2!T>$WN&Sdx+0@(g$kv^nco0^FZqNn6pTkY5GCzSzB*z>weG zWDer0eWOppfT`s?wATA~CA?UgH}S9YDIN>zQk-?n0n7ojAFz|rM`s8gVj4q|J|Eja zqCl94J;^fR>TB{vomV(zBghcER1@L}>@r!;fqjDL*!6FetBP8mjy?OMWy8*l`e$y= zR!L}59YWa?E$Qy-pp>t@?f>G$MMlhvA=r?^0(5-0Gi!M^& zzZT7K3!43#pFD}a21<Wl--WE7o9Ky*tJj>}#O4 zvSMYr#e*kegt&KL;QCP(eHjL@hDqzbT7&J8yZC!E{dMi}dt<0Nm5u3@4XU%quxSV+ zMXej9eN6p@Aj%9o=SjnBSx&`L(;2w`*>edSufB#0)A_r1ZPDOYS)SB>Di`sMT;j;f zWA3ln*5Rs#xL0(cd3vwPoJZew4CQTg;ZTUNAfG-F!&(LuR;QvUbCmUcswpjBwe@eB zh1f(mW|z5T_biSgu~hz#i=vge$P?i2c-Z_8Psa!hSMXWxKVs?@CozX+`!yc<%*kj@0mYLrr6BmD$tjj?{(j{i}4!TEQ6VDZX zT)rrOd?!4VVkU6ww~zAfmm#^|+dCtyFQN|@Z_QVVO$E?f0x)r3y?;sVrdqmwG6~-J zE(otOYWU4fSZN?_sz2v+${O}wi?`$jHVQb_gzwUrXB00zM3O=c&Zk_NOOf(;j zf0F9ADpM2GthAYCTgq`}HGD2Cne=pB+VopKovWM%6#Pb1~4%ATPFN`W8YyO*$tyz+mfi zbBC8?&AWU2xpFTrGCD0|aNN5>lox>|Uter!rnA^V+MnGTS) zuy5*T{zOVHG|3kYsHpN^y9GUb+COZI(XH2sR{@c!Y2w%@{-VapZCxf`bX`&Vqqu|U zM-gu#xtrO3AXl7@aGUXLvnMeb`aue{uKlhbBRvfA4nn_l)KuSv{rCmSrV54?*GDt1 z&J&aFrY81o-0a^Wh0F#^m}vKP8i)i^XeIPw{g;Pt4)1%_Gw+F^XpL~0y<+*^c1mqJsahJBVZEC*kcy*gvLcA1(H z?VA02ce{iscG;kTQ@3)W>Uy#C2zb|i5c=xt<%OCKL^w+3PYP<1=${GwZy0^mK!sXB zXsIXr_UhWO?#wrE?xw`3&=DnfLFDs>P!EjBoXSsnlNVruS2H<7zm2AXnn&V2zn;ZE zt{lthg(K;NP7scqI+TkBYQ4AgkKbXNNa{QkljO{!L_HJ5^nO~07NN9)lz(=zS8&9t zHzQEfJ@Zd(&)*dFH>}T7=5FiVHbtHVuek*%LhdsYs%)_)5=?je*ReO0z$*$TTvt}E zfaKS2T#j`h=48Ist@IO9Gz^R8Y5pE(`i7o~1N#;__g-RGmwvF=|)g#J)Ph6-TuDNyrnmzLv#1z^JN5I zh(=JkJ3GINyZtS-#Yc_;eBLjQAE!B}Ez(K?N5cP}#K6ShfKTURstS$Zw;2`+8@n3R zUh*VQx;u|*>8{vyYqW%wBY3CGPn5KHfX=C~OM33_c?!l+U~)E@5r|91QyEA}Z$vhP zlI@{d&=#E)SZlfw3rPChR38lAa}eGTNzU^KZyO379FI~;R=-p%-DOva(TD8vhvArs zCzq!P;)AVR=PdjkY~(-J^C@~+b%wc8`IGZp-G1AwI5{uf=H*Jt<`gRq3X=^hp`3J! zI2Xtjm>C=peBI`09KZR8cg{5VS}d zlG``}o`h1sjFnCof+JhxG<(Q}Cc5^@`mkiRYldGi*@ zs!Q`?T!Q%$WvD?d>NuSTsbO{miLT#3AIT^|>d;QndEla!z($^aphU^Vtr-upGeyaA%< zb?Y@!2d7Y8l{RKkoMZGqOZRIH3gZD5T==|Pc0?@+`7naohp567oJuZYyi?qN!<&!2 zjQ>P#qhweAxa@<4ih4DmtR?}_SV-!hvT}XbxYR$j=Pz=eC~j9{cU-HV1vCuQ14rQg@UoyCi1OpY$G$GGdkFa+v)o45-Ezc3 z5c9Qaf_9jiYcK^%7)Rm7F=O`%gwGl3fs2Fib{z)K_udYWUj46ya?p@}JCo}QkYZ}E zRR*|*bQ-_8#W%bm6GzEmZlhr@xON&imkj`R-`{qy7PtjY8I*9P2}TOA;-w#EluyU{ z`r@!e=?55OE3%idY*zzk(N^QE>s0i*YrRxs=Fe{)D*7rYjM+{n8V8JitptI!K2bt* z%i9^qZ3MZjHE0BcDVK zJ~WmBgO@h3L5`jg1LE9fA4JbRuUoIm@09c(JmYsZLn-u1gGExya`04{MZT*5fOF(# zRtmD%VGT-RbUJbBanhhJCAAW(11YJ;eWNGb^3bU37+QIIR_OvqZexYJR{u^Nrhs|2 zy8lMja_aFYWrUNgM>y&5swQCT>;=N^mdh`6Jm{o85{((U%!SYay{4-j$bk~GU=v=@ z=J!_LvLY%B-tkM9_u=b|7D~{umeh!(_PLe$0FM;{F;grl%gk5(l)2m7--69|njZGw zw7wAH&hafCY5*>KExmn#0fawS5dNx{tV`ehZ_XnAJW=4OMikcWQ5u1lya>iPwPH7~ z0DmQzR@x38LO4qL_^-l#RO%fIe1=%Nbni5!5f4nY9~o%lq7>Vwv*M*b}9cw6LK zgmNg<9$JFFocM~_rAzaC6#3Ci6fikO-~>Fx+_qb8ZoelduG<)6Q9~J<9B6z(?yzN9 zfD-yCkf|+3CHhumU_$>?H0Y}6C}lfo6tshyw9= zZnyJ|%2DT+YE^3G#)AWE~fui`U_RSQW4m;9! zC$m^a&>L>LK3#1}xCD#A)}?bpaAP0aftMqZ#8m`x+x?^Zs7|>5?9<~eeQ2pIn9VVA zd*mHGjluQEQv~x`T7E7&^Q2jt=GN(n;4p>1RrjMPveQzFg{i4y71M|)p)dg@uB%9e zO`)uuD$SgeA2W!u!XHy8 zdP~(ua{Y4?`Bk>Whh`6L`ON1~?X|K`!wZ{NKaCDKva5l5#bxeZQ6hF@tJaS8uPRSg zX_c%WjJxzLT+edugxbB7f|9t4){++5ANaDCt7SpPcI&75XT-s%jj{A)FyNVl4@s20 z<^1t%D9x-Ox$R3SLU|(z72otW5y!LCcV7I-kmvR*GXUzc*=nSsRf9#MHKCDRIx9r< z{NF!Y{kXT(kB2bUgp&VnDljUk@Yl23u?aWjOprCtlza@kG?B6z&wC*p z=)M1T+lR^@w`vGWMDbntC^{Dxd-l|Lnwk0d2io^2G8^UI6#Trzdks(~pQ0HXm!8u| zLdq{5nNrN1kd}+R3+|%2-X4^m9xq349SB-lVBdParV%U$NM!_ zat41E3=|o6&j&&#ZZ)aAj^w(dNBK@s+H>0JJG*<4$GbS$sOq?&XLUl0lUtO*bO$w zOW#+JgJlO=aF_YB*>8MkNY4~YuyAX6pUN5B@E|+&)1|NN7RJW6IsPAmc2M$U8Sfp< Zew_%+{wA>selfrRH?cIXHFCZGe*j@5I_dxb literal 0 HcmV?d00001 diff --git a/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst b/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst index 926cb4d053..fef6f925e0 100644 --- a/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst +++ b/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst @@ -119,3 +119,66 @@ Now we can use this in our ANMs instead: features internally based on their **alphabetical order**. For instance, in case of the MyCustomModel above, if the names of the input features are 'X2' and 'X1', the model should expect 'X1' in the first input and 'X2' in the second column. + +Creating causal model (GCM) from equations +------------------------------------------------------ + + +In the above section, we saw how ground truth models can be created and used for a node. Now in cases where we know the ground truth for almost all of the nodes and we want to create a custom causal model out of it without writing a lot of code. +That is when creating a graphical causal model (GCM) from equations serves as a robust utility, enabling the generation of a causal model by defining relationships between nodes. +This functionality proves highly valuable when the inter-node relationships are known, providing a means to construct a custom causal model. In this section, we'll dive deeper into how to use this feature. + + + + +**Defining Equations:** + - The functionality supports three equation formats: root node equation, non-root node equation, and an equation for an unknown causal relationship. + - Structure for each node type: + 1. Root Node + = :math:`N_i` + 2. Non-root Node + = :math:`f_i(PA_i) + N_i` + 3. Unknown relationship of node with its parent nodes + -> PA_i,... + + - Note here in the above structure, the :math:`N_i` is the noise model and the :math:`f_i(PA_i)` notation is the functional causal model or simply a function which defines the relationship between the current node and its parent nodes. + - Root node equation defines the relationship for a root node, specifying a noise model. Non-root node equation extends this by incorporating a function expression involving other nodes and a noise model. Unknown causal model equation is used when the exact relationship between nodes is unknown, only specifying the edges. + +**Defining Noise Models(N):** + - The noise models include options like empirical, Bayesian Gaussian mixture, parametric, and those from the `scipy.stats` library. Lets look at each option in detail - + 1. empirical(): An implementation of a stochastic model class. + 2. bayesiangaussianmixture(): An implementation of a stochastic model class. + 3. parametric(): Use it when you want the system to find the best continuous distribution for the data. + 4. (): You can specify continuous distribution functions defined in `scipy.stats `_ library. + +**Defining Functional Causal Models(F(X)):** + - Relationships between child and parent nodes can be defined in a expression which supports almost all the airthematic operations and functions under `numpy `_ library + +**Undefined/Unknown relationships for Nodes:** + - In case when the relationship between the child and parent nodes are unknown, the user can define such nodes as given below example - + :math:`X_i -> PA_i, PA_i` + +**Example** + - Users can provide a string containing equations representing the causal relationships between nodes. + +.. code-block:: python + + from dowhy import gcm + from dowhy.utils import plot + + scm = """ + X = empirical() + Y = norm(loc=0, scale=1) + Z = 12 * X + log(Y) + norm(loc=0, scale=1) + """ + causal_model = gcm.create_causal_model_from_equations(scm) + print(plot(causal_model.graph)) +.. image:: causal_graph.png + :width: 80% + :align: center + +| + +.. note:: + - The functionality sanitizes the input equations to prevent security vulnerabilities. + - The naming of the nodes is currently restricted to python variable naming constraints which means that the name of node can only contain alphabets, numbers (not at the start) and '_' character. From ce8c1a356828dc34a000984fc6daa68f07782d05 Mon Sep 17 00:00:00 2001 From: priyadutt Date: Thu, 14 Dec 2023 20:01:01 +0530 Subject: [PATCH 2/2] Giving only positive values to the log function Signed-off-by: priyadutt --- .../user_guide/modeling_gcm/customizing_model_assignment.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst b/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst index fef6f925e0..4bc5e36123 100644 --- a/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst +++ b/docs/source/user_guide/modeling_gcm/customizing_model_assignment.rst @@ -169,7 +169,7 @@ This functionality proves highly valuable when the inter-node relationships are scm = """ X = empirical() Y = norm(loc=0, scale=1) - Z = 12 * X + log(Y) + norm(loc=0, scale=1) + Z = 12 * X + log(abs(Y)) + norm(loc=0, scale=1) """ causal_model = gcm.create_causal_model_from_equations(scm) print(plot(causal_model.graph))