From ab48dcab4e117d85d726e7026db9b62c1ea98a61 Mon Sep 17 00:00:00 2001 From: Abhinav Bhatele Date: Sat, 26 Aug 2023 14:11:56 -0400 Subject: [PATCH 1/3] add AxoNN logo (#34) * add AxoNN logo * Update README.md --- README.md | 2 +- logo.png | Bin 0 -> 39282 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 logo.png diff --git a/README.md b/README.md index bbf7b28..cd65d4a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AxoNN +# hatchet AxoNN [![rtx-3090 tests](https://github.com/hpcgroup/axonn/actions/workflows/nvidia-tests.yaml/badge.svg)](https://github.com/hpcgroup/axonn/actions/workflows/nvidia-tests.yaml) [![docs](https://readthedocs.org/projects/axonn/badge/?version=latest)](https://axonn.readthedocs.io/en/latest/?badge=latest) diff --git a/logo.png b/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..c3cfd99610e271b79ddcb93ca3573bbd28257a33 GIT binary patch literal 39282 zcmXtgcRbba`~T}4d+$~Dj%>2G$S8+wGD1d??978>%iiG}nHdqv%50Fz%-$h;WN*LQ z`}6(%!Qv~?#>$*iF9%zsd-XMe^h(z_&1!eJHzwt-@=7QTkb8Reh zh%%M)R?9fge6-!}eIO{-JRiQ|jAx6F*HE6D@!@^Tma~cYoe@+S>LoD>VTsEbVQkzf zLED0Cx)>#f|s zINPJN@IO_(-9ZCc%XjiX7z1VcgHYSx2F8r5iu-POqN_L~m(>$QW#vwkRc1RkAc&bg z01ibAj&Av#v^DAH)(x0hu}<$)IZ@V6`!_<+O>zYoB)j%Ba??<_>#Qsjr7i2_nO3)5 zB4GG1^cI8>2h(Y9VvY05ci*kSXtwWb3uOu(8S0FC5JL1f=BXe#4u8-vXg`?rC5ts( zi_y%IVAYooGLn)m@+xR0x5r(a>H(V`ir3Uv13L&Hbg#xS{7I-Gga~8@y ztn-^IiE!1gD3#z71l>KxhGY*6#k&ZytCdQs?0vF2)S)>Z&=viIL&w9&`(_R{%^xhu zx#HkRMSL`5O`jzrJ5DF{dWu6TUYHPKW(|NV{VELqc%*7b)j+MDF(yU`ZCw|B#ec2z zB-fHWO}Hnk0J3HST^exW7rZR`tv7O$enQL2FqQTZRE-T*#Yc(Kd;PXfXTAl=$2Qq7jA{hQ$dJ2?tf+XbSHCca$!%sPWj&G{K7%z z{9hNI!<=@xFdnx6B4|se8PDq|gHv*Y;&sxaRW zc~HcB=no(3jna2kzvUH7uKr_#1ZNKGy1Qi7IM1ik6eD!)NI!p7y3ACf_PYk)1mwUk zI}vuW3cnP09#eTpFnQMyzTLdcEpb)7FiPl%no5#?;GIx@LnhZ-XK zyrB;q0dR@Lo-viXl^OhSIQfu39NORk3-WslR`*nc*bzD3;gSdfq zL1dK{c~@p|Nrj7AX%!!>gv03kS&q$0KQpcw>ycjdJ{uMkOrZix1-qYmZ^DpH!14Vr zm2J9gPNZvstr27rM^1RHOfa03avAo)ubz%@B0s*odiF(xA7WI-$3aNplESzWFLKM{ zado*Ik_3OOe^DpC37-q)4vU7UU+oZ3>%hlru*EYdlS>Z43kt%V6c}Tq@rMrP5 z#b{Hu2X@_nc0x$=kML~jc*8~@Yh&;oQl=O1Wt8*ZZ^dwl2^JXz5Ma%vFv!o#0)@e> z1Ro=rr=r&9yCU<8~|5MEcsNz6_vjBh(KlB9teDy4Ej!N(}mbgiWnNa=5&s;s2wTVYT}yn~IXAd&>^?}I^C|EPmW z{d?;)kZ+YFLq@(DGy=rnoYth7>DS9+`>-A!=vi?j#R{ErpI>l}tdl2^Da^AbYd#hSvc9RY7=SQ)0V5+<2qCuX6!4d?UC;*Q zvK)1P;6Xr{jg!CG{b!$W+bSDkRRk5gz+7S9g;aVEewM6$A9{8O$7UaprE?z{~bdQF*8V{T- zasFHS3JwT!PmJ*gd@Ss`ZNCBW;V(E8Ga{RAAI-;WeFsdIUge=O<4nQl84jp)l&uVd z9LGnu3;MfZ9z#1T$D#rs2q2a(>cJxDc|LbRdE#HS&5B zdK^S7EH#)B5b~zuXB;gR(E9T~*013Fp=ZjVr7&oT6px-HX5zzUV}^iEbqj%m$#qcV zL=0)BWUjG7?RQ%4Oqp&m3NwPb4O}D6ycH}^`xBlpVY5Y2_{o2j^CA{bg`UMH!l0vH z*fxceZe0zdO5WhGl-w1T7)JOr;x}G0< zl>QoXye+JT4G4ar=BMu8lNQ(8NVmFRLMt7QL;s)Phe-TbOx1YkN`>_f1|cdLG>G)CPWG3a1VU)11AjRLE(?zA z1aZL__dJXqe*%C{W?r(lAp+o{FczMEM>Xf>?l@NQK+r|Ja1{O+=w*>J<+-s*o4{(> z&Zm%3!#lmF*w8-s)&OQj-^;jJZ=RjDR{fk_3L9GKVr9nwnW0z7{^Fv`A0Ruh`BD3? zrR7^kF?%!N ztd)83Xz7Jr_7NCV{W^;^E)zwx4VDIfc|(cp$9u7cD4>R{$*CB>!e>Eo=u289g1pxQ ze#8*H0WU1Iz+?`mlLrAOgO>(9@*$V22UrBMQ-;DNfr``QM8-=?UDOqXeduy{xpgl> z*NeDS4MM(D$H(lzzzmYGDIUS(m%3%Dc#FPeIXCn7o`KfGL}1a37C>^fim@ECzns#i z%iezexkc>$AI;u>jQCW~VbIGJ$WI2pz|@FL5L`f-q&tSw5&qw5Bu0mOv79J9cn{}F ziLs!F7s(JI-DacPJY16Y+CDbX_$WsZXKHPm=QxxiyhZO^*-XB>30X51_k6wujDq^E zfrsU-Veu6Oav36W)Igcd5dh*maNZo{V{Bs9&p3Y<+gF=b-hLuF59L=USNCIbu<#Xt zkQ2E;X~j=Mlh;T92=o!Ew?~)8Q0&ptZ+|_yENk-$*0}u))Pi6OH>6bg;5($+Zidor zM^LJjES9EgW+9DHG+E!8)aGs{@;?5W#sX)Q_=mv)OoB1gS4s`JYaDd7VAm zmP$1|8DR5Bt2V7PD`dwTwo9v8?sXdV1HiC>@Vdtcytg_lw8S%<{H_>t^vP8N6h^boweM5fAb7N z$QmZH&M26gF!PWr3}dHEzj`>yy-p1Ajsx*|xk6y8cY=-F6#=>z5uRe8WFvarFrlO$ zLY9GUZv*6of!EEzAwF*pAn3GA+}F5n_*z#Zik1K%Hhqza0(1Mn5(+r4j_RbqHG1cm zzND_1fslk=srm~}lxT+mJ;`eTnTs=W146tnHc-+q_5BS*9U2}uQy4XQ4k>;09Q`F4 z8{c#d0@(+5zs#{gN=%(VvK5hL*JWmV6_OB!TTGe$Jr4Eq5-4H$g}TI9soL7vmbqh`A#ZuK=2!JBnO zl-?ZJ`u*8ve-lbb>DAV~l*~%jQtJ63DoUx6#bpFZl}wTcX$JlqsOudBIhWwg-7nUh z`fG&YL+N&DhT9t*9pBv7GWH}dn#NKf>b!T=BKQ3@1R*$>l)9v_s14ZyWVBkxdKoND zSMN!9hLFi018i?79ZUitn>h7hOU?9JU3KPN0Nip}IqI7A50|5wAvg4N(`nRf>ZEI}Bb<1pxr8%kIE-j2oL zv5wLO^%f_L`d#uNcQG*>(h>Q5BuM>A9!=Zt;L~HBPcb{HB238G`}mj~Y8RFiL*n1f z-*u9D`NuPRuCar{OnI)_bOI<7T+`%&3uHcp0tt8JM@ESniD^QW1FGJndDXosQwYhn zpI*h~iW{SJEN(E%tjm`-Dcfs_-EgnB{?YtjI7BSM)^!e3Wed1*&+KM8eje6JG<}d2dhWXG4$w4w$BEp@Uh2=8%p$dx8(t237vcLWD#)7fFqmDk_e303CtOj} z=i!Jq#A*5QTmrhea{@uaAI{qCg^f>LVEAB_t z(bQ+#-<+T6550 zn6N=5LHwf-p+(02&suLKTxEc!sGEJPjp~ch#3yX=8+ekr9|)UE5Rax?lAoRbhy^*T zoC(}v3ouG(LmgO8&!+)&r1kbi(QiT@aOmIrKf;1=pGn>j2ZvXqL%Nk&Gud^>oQfN#LJJ^>W&@N7& zvfhwU}cFv4bPXiqhs6gfP#3 zvBo*J9ngbz1~Y2_hkV4CzZl{i305SfQUO*ZyMTU92SPi#P+!YKEMu{rtVwI=U!E0>W33m%CF$C5BVw}n^;sX{$Z5L9jc%ce_!-m!& z>cMx~ls-e#&6FKaXk*U=0U9M{=9qR@~SN1Ze#0fJ>ual-ew7^sN)= zFQQn`$M@Z)8$n|p^PM=eqZrCvt_>S9%d*w2{>so+Avgk$pVa40sbkr53^d@0#e< z0TZ@@?Sl<{DqvCR^*E22EDbOMTHLF>%yh2-0J1t5N+8KepR3gCBJE@>`!9FnC zJNb%rC+YB*$83?H0GfM+GoB8^17|h-z*pBLSBJIx1w8j4pRaukuAO`9BbTvuQeUl3 z$8E=qL?D{bJsON zc5KhIg{#v+z;P0)en2X0mCC}T-o=G_pa)2_f}ckU<@30FXd2sD2bVEP;Cu%f4%{kE z!9r4O>7@9En9t*OI*^14oBh$gUo$3fzzQs~KJD z53U6}h6nj`F}7EKTMTe}miYGT*pPvS;_>SgS__kSpeI6wW%?fO_e|w*znDxJCB)d( z{e>$nl?KT?uG%I-aNN-~E>PXUMsQv%Y3N$%q?knZVfiU0Usl*8!7$=2Ydp6#mTI{H zz_C^A^*i}f{x!-KN=tm(=&?bDgVaY{ctUA?*}&xh4B5g`Y*p`#4k{%>&y2g-UusW( zBn9orbJu-erYx|5?ieA#d#)#kQtT*`g4D5g9R$ z6>;kZ48|(qG%#}cN=!~ZSP+Ni=r_!U@q<8mrT=3S6+resi@e4#DUDM zoq1Q06N{{5^mj_!oK>Z=5+9)fTtmU}HzIwBGLQ-SqT^EcdOjM^G-Zd!&1oZ#-5Ne4 zhV9o91O>Y1;>EI4M4tgD;#wwPDcjNVB7X$#z(CL4T4bGu&>$C9yT?-}%I!l06E16j z84_xOb=2zC4>N9;TMwTsdEB2VICXd{GM)d&9$yuAc;%=o^fF(djL|?Dro{BursM=4 zO>2WcaO#$SYu5avsq(LZ)nwsYD{(4b2jiE`(Vr%r*Q%wL5nKzYr0NE$KWuDPqzYI{5jX;tAR@Bs z9||auW0b8ivBq&-D$l0jQ~k~TH3B2B!nAhj^&L8mN8j_F>-hM}m_k1k$v&O?TjFHV zRh#@vyakm`!e;9ov*LD~q$4NGKb1%AhxbY+lue-e*`$9Hyl}|=LWFA(>0w7d@M3lp zhLn`@{%VcV#WShtdWx{z!bv$a8;baBpc&aOJ@KM8DPM)hrRndZenCUZ6Z!W)#VsJ_ zl|oIqZ?Eo9%@46fJg8?(&3GuhWYLRL{7_*Y5VnWV768r{tN0ZjvMxEY}(O6<`q-T!!zzs{ax-q#kH&&$9yT^r0N7^<+zSdm8dv&dQ32LX1 zJFgY~j^B8=xAjT!t+!~nyU%*pIs#|9yWqmVZZ5sMBi#jRU<`{SRy*EDZX66)nUoNZ zZ5NoY(FV%RCOA6?HWA89W^TDL7otN#ATxI9c>Cx;48-UK47arFJ5zjgUefo>Q-_*2 zJ(JlRB7;_L=>aYU>mRch^f&FQ3>A283^7#&hjgnJo{==9mWklhtL?V_sU<+hUhChb z-t6$5RgsgyeAShPbLxLfs`W!3mmIQc&ql&Z_%&>Py9Bae5?mTYd7lg~9QFTlq7)-& z9 zVaPYD9V*~rWeK?z4OEu!G*=eyg*g^7xz8ZAMLH-L62mCJBTF7uh(M9T4ZWfw`R;6te&1f&!*N`@s< zfGUNzO61$2Yr+`2&+M0!8LX(MUScAVUj@RMBkwF>13ZTae%^t58IEyTUY+anS8ER8 zoXB^+d!eufXZ63lm?wbNx;J>Jxl@7GhNZ5}^qQcZ?|+wWhOEyV7@UEJAJOk3K{uA+ zkQ(k>oz02A5bKMz@89B@tKZ*~ISjy24+{HMAAzV(j2oB~AQT8-vbMoRECsDaK3Qm;kBZm)LTZ5pHH&2Vz z!EU+5Qa-WHAh_9kl}dhB*2lX}cbND7AJceim~|GwvG^E?@#Up*T+!qm(T*llZS+;+ z@64?n$zvQu#}5tN7KxMA)49!G-@=dN_HNPn`u>S}uGpkel(6+LkYVoQtq6?~!1r#b z6i=E-O4Hno>y{U9glqk9Q4re=yCH8si*t}C8KnG5A?=}wg~ufx2cKruqhLey#kW$w zMf71=Rge$*boZpC?2Kd97>~AK*EB_*PS9d!HykT$?PInddRdS5Cjs8gp4>>y|Hsi_Dbx}DUnn>?hQX%E+! z{%t^~^N-b(-)CDF38?*LWX2%~wk zOYls!EDu}vDAVEsyW+?{xYjE0wp7=pZbh>|fYLOr<@ph|9=&5v4aayzmmYunq14dO z{nHgd6Co&>_(-qDV@)ley$M@sp53{1@V-9CvgIOuyPVEqPbwDGB;s587@Q`NfqZUP z7!F3w%Fnkd`)Z}H=F3>}yO_2mnrN8|A;tGI#ewS{EVlWG4AqK&4}jSo!RK);I3-Bk zP$jd>S2)f}wEq^re`)=Qq~+-$KML_1=;1On4WRupl04*3SV!7I00s8HTmX0UcZVSO z1F3(j<)bBNc(e4*&q)0IQli-tJ|KN}hFDZMDq{DZEGlCmlUFX#T?(`nXgmVqH;SmW zTH6ppas~eDYOihYf3nd(sC8+VfWn8)6oaom3q7_;iYCq94&37zJwl0Q;&a{1Kllg; zG|aVmHr9Ht`JIJZ-f=ERMCz$@V6xxnw9qmxhgb`b0U5WuClZhcifqw;<`8$6;cPZ9J7$_N&G~nKB0Zs(YEL{h!GX1L_|J?hSZ_kITK^(;TrKBrS3uv{lL32I`6+$fx~e z^SDLGLbX3GcZL%^Xj8IA47&R(68+76VA~}2q%)SZGm30+gS1%l)3eFDXF7*Qxrc#F z&q`ERf4sYhZo14;G^+Byl$N;6c~Dffvf*=B?la}p(zMMe+l|Pi0;b}qzz(IW<$Ag+2FewWe&tKQDauR-%jkDs7kaoQPxI-S(X%F#XBd`4uG`VFor*f2=&VY6dhK_x6!3SirjH-Tik}+Yj4L^VbdbOR7w(BGQ$~IAl)Btq>_4 zx&c=1#F0CufG~BVm>+u1A?ntHja}Bk3HFL@%-xByZ%AR2OrG(cv1;&Ti=wd>{_vU3 z?Ac%4GIR7<=C<~$b8b66$?HH3n1L@wp;!)SB0hKHS-VW5YW-}s2ZmECL!oIon!GIG z{;i4s?&+aY9TH|B?!^gHs70+|8|Rae?hnjI?Y z7v2S&M5q|g2~^#~$8^VzOj1%P%*%&O2$^;#6ZpSYO zqn2LbFy_@s1w?T5EY+Jy=j?sfP~?XX4~cKMQ*G8GM4621|Ezha#Fm9pDkWhJ_ay^r zfDhg9CG!YA+$T^;@nA~*Ou%nnI=?MojVpW;hYQKhaTu5m`Xg6(5$&{%Lpnd}7*bY3 z=R0eg%Y3i!D71BbD}%0v6&qpKp2W`uf=OEc=0lkaIsuyeq4ib_*CK{M#+{p!k%x>v z{snqqFXS*S{KGsL9adS-1%hH)G{J)s2*!wN3pI(K9J`nhj>sKs5Q#YMv#jV1a=7Y` zRe`=D@zGpk%TxS(CiTM`!3(ckMRL>=&YmA#EDwM4jQl9v{@~lin*oiO>$pMQlx_Rc z@1;zGkogShzS*OOpBcn|yUr!w**+84aJ2+MQTc8`o79J!LrlHJVmkH1i$X9zBLMt3g2%x?{K^{F(66#k z^Cumo3;Z#>=f93yNykcGgzEq!y^|WTo{Hu^ir7-%7d_S~6R>X;D9$w;w1_t+y@LzkT(PSD@}>AT*X~1#b}5 z2bX?yl6sL{i;LZ>yjpU88duB3_hvZp2;V=Z-_~>K;iEF_MrI|iogAH%OgwiJI#g0n zpKkSyu^47%@n?{sfzo>kd8b^C6T-!*I`e7uaTkeY;7OICVi;m4ygUu}8}!$n)el=V zJsYEdc;8|_kdY%uor|t?`$_t-2&;fJ{_|r(!H@mKol!fctPY(e%r}069_*iYzaVhG zGj!IpHNe(1>FoXmLh?v4f>_#EvW~36*EMTdY(#3*V?;;=8Crk!cgvWAJ!RojRRYf_ z1x1QCVY`&{YXPhTJ4V|WSXYn4VKbO~}>v`WZq$Iet&QemSD z*b#CaPh^nAniVWsOtt4lImt0R{?qRTUe49D6WZ{Ap^#v9{CK3A$7XJr58m|rewmbE z(GgwJ=oQ=Z_WVt3o8Pay9fp?QynFmhU!P%UYBy)oR;wpF&NY*8=WIc(lc> zZ8qd^UP85X7v(rIbWOd7Dfd{@9i39%)0BgtGECz?9Qp?Xs{Wo054D)YSG1c*b0ZI4 zI{oLW%2(fP+ZMiVt#W)jYQSG%Ddz{5iUap)>|X=hrq9%)uvIa`CC8g{*0xQx8xwrp zLH&){Gs}KM!|=@yydd-w;B9NY0lN!(m(<}vANoE`*QvH8axN>&Rok+NM3Rborqjt! z5kx8d5RH49_g@--f4IUhEJLPUmz?C0sBfG zU!VY7UXi2p}ft>>UF1B6eMNQnA0>Ju&6kY%Iu-pJvLq zdhmVOqp$3|&Pi3})n5X1_n*hs#F3;Jx~nS!OrFiG11#=s0HvI^SDDCkme=tC$*Wq0 z{8X8t%q^YwpshriWstViO}c8jVO`=dD25mxty)&Q+o-lv@hgr|2oCmZ zrEK)Z`n&+M0(4k#8a;)Nq3_2glE^XZEHRjAczH0`RV_U3QNWcVh?@qGVmhc}L!l%R z#To~~`}w;&*SDHJ+BNPga5b$k+W4$NW={d{umd%Otbv#e^c6bZl0#{IF2T-SFucqf z8;HV2^xkfYg8uL@%Rup%(`a^brT0Yf<_s$K^AF*qR$IRc6d%zASYdm;m@Q$J^7w|- zR{n5o0iWtb$P-s~Lw*3`E<`khFBnB2?3W&1i8>k`FWb0@ZCnMwmA?o*o$_cG0G^O* z1dS_ygrNZxcyv?YF62ezQmtn?anOMWHcP^KU7d;M?~%!;H9`=_Kz`5;v(>Llz)ZVn?JeX#5s=^x8ZO`)WL zZYq9oxkhKOK^%OTOr5Do^Y{{1U>I3GLm0q{D!7HJaPZ8tQQ z#7b)T(RFp>8f4+wdw~?b+cF8ts>p@w{FPY`>dGkS6nHo#mfiwqiUJzPKi}5yP6tozC5aAX(}4OHk>vkz_3Q^r$ApOP-n>92PJVY_@>0a}yX zEXUppm+x`knb``_&qazy8;`4pyzvSy{M{xKIE{cAY+nU*eer3Oq_zFyqF`?b+$B-F z7KSpkfg;o0VX7UXNnS*QB_w~Ad(n6`P#;cK3S74-M!>{+@X7^f4X4P5xng#^ZZt%c z@yun3M>EKw^jDzMNr!jF%fX3lGrPf4mv<+gZ%S;P!58(^mpszTO2kToEGvQrYnI|) zmgONc=s}Z!)%ljSBgOMKQ&*1DLH>(rv&(yxn+{@#R$lQ^57K@@HYQt$~`6HknW)<3-HQATnoYou^*?fAgvS= z^lXmXnO-_sU6ld9RV&+(;)M@c@zl*T#L3V6l^!0et*HX@(h?xBZ=qU;ZcB$nHaHY4 z;)`E&+R`?T_P<&__K;Z{SyuQ)QbUAO&qzpwu8~X>dp@-oxL&zIwKyM>JJR3-jU7eT ztisgV4vd9*FG@Zw(6Lw_C)!K7ay7H_qXQZ}j91R)*t|rLYeIq!#0}JTVm}huAossK z=y~4Zz~#fTqvh8xZo%X*A!t z=2_X)X8rTPgN(O96XE%)3^bGq&6R(p0{ePxXP1i(%||*(nV7So>ol99q0Rc230C>1 zPE1vi?tk?RmQsWkww@09`6m&-lYY|q!P>k@M*eQI05S-~ib@V>4g0&Q7ITBN4~F;x zBzvQFqHj0!y%Cuna0q@!|3YM}$HKO0=!re_45$abf4x`xeeM?`O{XUl$nW*(tN~BN zJ&#kxSU;BuF@9hTa25aB_(ELKSPC zPI4;tWAu392UmxD&Iz%ie&+Y>*`axmE(3nCe7VQ_!;KMSOum0rokfocj(P&9=S0~9 z*i(SYy4d2V%mu^{2dB<7t}tsEx)jB%uz#sLlewPIbPygVfb>74*@?mD2pTx@A3wUg zQiA1JKm1#u;V9F?aAu&xBN?XJt%dvc)xm|}O_u6cT5Y3#XE6s3f1H68>>;VfZRC{Xh6eO2*`1`7*)Fy)r$72cLw@Uk+;v==)Ey>V!~WsE zgdiK8mL;0;F11g8n^?-pM8c@X?d~s;-Macwluh*t#3{dh_$r`c3cl zXReTam^Kni;+tfr%6wI;cC2PdyZyAZ{uBEd%bmSirh|OEJ*IJZrsJ4AI*#s8cjyRb zELz7@zMHvUi(KwK7n2ka92Kx5hFxcA`Wnu}4Eg6LN9*fJ>wnnt7=Tdstc;8&a7x-= zF6__c|EF1do>4D|vIek=@y*B%2{aJ1@l6wUP-OsCwH7chjY4kOht(D1bZQ5dFL3Ry z`KmkUq0+%eWPfGT77irOCAX+@M4laUqK>1*UU?5l;$(Oz#rgn~%jWwySK%puguU8n zdxdv3R90hFG~Y(_{>U0HWmZ)|WG(G$t^r5d;c~$;c&NTX(@mX1qq1GGE{+5vgnUOI z{Qj_^7K8-MP+l^1tG8-Jce+LdVD}p@4PVNa(#}(*olfHA<(?!ARn~VH(lzM){p0@v z_j80U?ai@}cfG58x%zAL>*CbYH+-r^rcJAlB`~a~jvQ}2*ZR{h_PxbcM_(B5<;z*W zB|THndwaDUtNZJqYxQltFl{bB1My9TM<+!I&m8M}O96OrAlGwka+l-wcbXRoG>F+) zmD}9esV%Ns5d1M9%$ua(e)ra}%_}_`dEe5DUmGk|LPSj!R_FKBjM(xHN#B%x^N1 zb2(&AyE^Fj2OrD+n0e6Mm>Vnlo%Z$WovFLMI0B3skMs4>+O@hZ zf&X-a6JBFq=dr7<7)^cibog07-o)NjqBHoIh^ynF8LNXyBGc-DDL0ZfX2jfJZJo^F zitt_E*|Wt-K|*@KZ_@v*AzyneyLIcW=L7cuLch~c9+!(S0s>6<9~eDfn$PU=s{&%C z&}n>hcbM1!A1j+d{ld)ckY9gS8&2gtbZ&XQsvF(COQy@}`&S(jAVq7IS|mSFbO&m; znrWWuWkrqi@PP7tT&jbz{Esvxo-YJW4oC@?0_A`~8SlI^X*Wm8q1y!aX#v`t@U{$>oTKh%hN~a|}I% zU8fkWtK*9etIt*qR=S^BV$QIiE|H>7=`8BMzQ3cybQ)ILbj%fGp;CIuP_^^(ty*VT z5Pea}p}xElEfIEKCu;X)yD8@6Me%LEgH4SZN!EC|C5auggu@B2|h8s@xw{iEC; z%nDuTf?zJIu2-QSB1q5#x|Ja4zCM&sEdt>%`JWmlbNY;{ zAG1%lx8Hv9a}pW{u>C>5 z1Z+Wn^0AOVf!8W$F2P6D(`sf{+vx{WdI@B|T;dQ8@76k`XR)8>^HLjOLgw%091wuC z)#!?bWoQ>eQpa53e@EwE#J{|wxUL4x>Jh~JLq2OJ*EZwhuf(jt`bcXuySS*B21D__ z6T0%fmEzie&fs=>UrKLxMp=~x!09t{q-FbJhMg_9u)Mo`Za}8*nFPYvJ9%ur2%SI1 zP5jC@0h!Ss+=v(|JamX(+mkDU=fr4RMyQxJa? z0$X{=7$D-9mb||V^C9g@D}UjMIOc<*yqp05v&sa#GxQoL#UZ1I>_mPzGen=&$eKNe z*>&s;6UUSV@O4J-HMBvvAoNj^s(;_;$G!WFiV$S*u6w7kd(*>6qihl+HI+SmvTzA0 zDGzc!F*m~v57!+h>sBC;q>zIVtKm*CKK>Br9o3-}P!@+TXe7jWb}xq=v9!R}jpfd`m1u#uf{j~c8XVU&1uR&27~t@)I-5-MLL z#9IEs=qDdls}KYchiKMIgS6A%;Ni(ruFE|=W^f*&K%{Lqj_Lpbz8C_77C-weF=`Q^ zMNiT--+E_*I2~kn=UcLA^cgO6QavjcDawzQL9UJbdXN>2OXw5lArD(@zf_jhJDG}8 zBDE&JMzj(Egk_G0D~L+m#35cH2d>bOClv(9X~2b|9nxwhjSpEcN2CDsQh`Sq_E)C# z6BTvZSV~3){kJ`>95xWt>T)gHdy`cGi)oUUXHg=f9*uY`&ZQbb`-KNrhb(Pvx`Ag& zEo%cT{0Bvm<)u6jd4TI*1HWVHrVDSaTf-GAeX8C(^rF~M(91=1o?OrG$yc5`Ip7X* zKN`fPl61WamIaQ%+RfRgrF}uRp7NPN7xOJBeelxAq4#a_&f9XQX`_dceP`vzdx@r< z1AI_~+R~5;o$5%?Y=#Y1O4zb1soY-v0s05lT2IDv6SBh1=a2HYWHsn<2$g`}11J?z z%%*Gk%M$%HU&CN-+!}R&PvLGq39WgVO3$!XHIiTd4s=ot{Bu*+XtltPQ2eP^IwB!FP}UV2nQvCF2#grupI&OKL9*3gwSJTlp5B`Zno$1sj|d z!HU@ffcN|@*l9Z_*<1JbvJMCebe)2m%(dCL9rU^6ch}76%L70#SA~`fRtvVP*G-YW zO@FlfYPGkF@!kWdQls;v9D<(he{zuf7PF%251i1M-59lt>F4Jyr(x);cYfm^E)7v3 zE{Fq0eHV^CJM_H4qu|yqa1M6uVnl3%5Qd)j6!9;`Ga`$_`5rW zX3H)=8j~Z}KjHR~%fTR}Crw-#j?(yfP)~Yw=b>{cO|``~7SpYBQ<<5j+-#_#s0*9p_D zLMjw-_~$Lsi(YQTM!H?@l?NAw4zQF8OVumne-y;aJ4uWQzF<3O^$%9R z&^+Vo#Sf3obT=n2=qzcXR_aRA_oLTyZ08zZGXW>Iea5~>ff^_@m_JAe5{h3uU#|Fm z=6eA`FW#F%q`V11!7PshH{CafXU#$O=J=5pf=;ovYg^VqM|CWIOV+389DUGcS(!S~ zHgoC+!b|I2U#E5|SjvnQk0#gitF@jtFL|sTNN$d@S&rAFw?hx~5WMj({u9zu+yo!< z`;Y!8JO-0d!^;~73^&=nhjijhgj?2Pi;DK9&Y%9doVF4av;`0#z$0eVc1WFZtVHXh2wYp;1Mgw zX9r!d+))>a4EwqldLixhe!d@@u7OO`6=rnw|NLAgkOL|kSi8pk$zbjTt}xu z(!gv@&shJMZfjJ%*HFO+#B61^Tg)yx&#cOVmaD7`C*I5YktZwA?E{zJs7H~7mth`s zzljU7acCDaJ~Lnmmbg9*9DXoS?9JvcV@xum`iWTuJn2ab(LlaBzFk9fq}_doSg8JJ z-^{~HVqfy|eAs#Ehh*yR@~1NrwEpw9rL&^*+K}XacXRl zn^U<4vIlrwPTH*|E8bK7g(^g25ws7Q-P#2Im8~`AUb{dpEHhPa6a(+yV%rCis1dBv zyq`oS$T?H^(`D=RK)$B6l2naT>Wcf0l%)iep zo!&En3~j&AP5JbCy;Tp_$-4PYT6Z1VGyKiO9dZxwp4GfR4KW>m_O98=a>bfmJ}L?# zB-&B>fZdw|P0^`ryV;3uW!H@|C-#3I%7A_6oJ=vY*w|7pB=H&9C&IyQtJrMcPJP9# z(?mZMfC(%*U3w^_$D--Vx6y--5w_^m^IdY|4meyXFa`Ypj+ua{WPw|B*eC9AVuRB+ zTlD+Kdpraf@T_@=G`3YpiENrHY{0?m@^zd4H$Q$?JwpVXm-yRuO|lf7g1?>c0AMo2 z_=fu*;^Gach3>+#ESp(3pLVy;U%#*n|7Z6Us;qyqUL9^5b}Q`NSKlTYW0pSs-%9MD zle<3mG!Lau$^;9_^ILf6f!nK(OQbV0_N12kx${M>nwN*KDAUN&+;{<2sa8s68IDfz zEV`>v_1oj@Lv;fMvB9p|l@PVI-OiHo6RXoc)vG;0Ttw03@ap==)wt61ASZV~x?Nv5 z5nWDzP1qVyf!|nrCh^N#6EQg@A1^;sEkD+j=ueZ+3u}_T|9D12SReTBOgv;BK5<^i z$UM65YX52@gGmzpe=Y#GMQo*|_vXTy{PXkB8-Z&(zw9@k-yy&VcW7mQkxS)^KAQL7 z2R0LLK#!oXU{`VV>d1UR88gZ^d3NM7;qZe7mDDP9)O>!UK0o+J!g~hhVi4_mSKl$Y zmYT;icgDW&d%9EO)?{x~-@8Wmw;CK(d$f4;as$QE&YR6U<*hxOTs~nS)o<;e{EZ|d z_q?AS1}I&zftogoGkGFR*c;Ng?aT%(;LQsER!pR|phs?Bak+cXk$JEc?qwv=DgsHIn3QZq_AeBVf&0hE~sCYXn!F zQwDk2Yt`nw5-bIymcXoC$D^N#FTZTwT;lc(B@5GY5yLvN5mS)V_8c8OqENEoU>-~Z zPupN-;&Etx*d+ETF>)yNbLD*K`ZCH?6wZ4a1b;!me=MTp$oy$f#;tZW z4|h58vU=|IgjXP;zYvh8;&9%cuR(pu6bd#Zn0fp5ES;&%6`YWFrJ z?ucP2prF47Ga4p%$jNJ^a*L$>*McfO5nd6LxxR`lLoH#+QU>t>B&`ob#T3rc*aQrg}Y zAAWtg&*jV&n5@bVQyt*ahk?CirD@^zd1@S4SVSxg5+R0)FFuO;^!OEkOzH)#xASEA zYObd6QU#&?(aT+#1G0oIn+`o31u=f5W0Zg+4-?~499zw^&0AZDx4qpmwqhTj(_1G( zWmnrxzi3PQU(z-QfGrADl8_o9EPic!<={j)p;gcQh%G?j#^VxD&;KGAk57H67LL-T z*PSSoN^o)0d4U=LsS|sY@Ubh?+bwyuHLYWDweln$FQMVE@Zy=P8=*XFoemB-V5u6L>Q=i(2-Oi>IM0R?hq8b_Yslo zeII6sk7*>A!}~m519=bGgqMNmc8pxQXRxRD5}UHv)S3s^J;XXZe0HU!Y)`%5rPF-% zm>VeFOpxy#gwi$9i1@4HPP+0@M(4vXR7)OsuLxcNwDShS_KAI-gwe?B#LY=(vc{<| zJp=!H17z$$&;rQBT?Vo7K)G+ZQ@LJUATa*u-y->b$7FB@Er&9nvwwJN(}Jyuatpz2}~L&W^R$UI)Sn5N5~+-g&bb;CjogBRyQRBZkcq zsnlVh@#g-ghq^`yVF74jw^T$Qq5_0Hbq=HO^M67-E-VSDFk8 zPcfhaKT+}h8OFWngpp9nHTu+>2Q&8uZd75D?cNa$I@_5WP0@v>1tEN@>QHsa9ZOBo zw~hazInKwkHmx7 zKS9b!9;4yw?em$plID&9oC%)_o<`vAHE&q%j4y}pe7Zv-@@DP*9?7#0CJPs%&h5oA z4{9%*cZl!S^Fi)}n)f97qgI=tli(NxRvqA+N%D%lSB z%q;6wgrba+5K1_q*L)s0?9wu}Y!2EnMelEUhVZSsPIp6Ac@BqmY2`maBD=+lHx`a% ziqu{cg0=yxmQSz3KTDOgaxe4nG~GTB{(+3qJ*eFSx%3mcYUdPWLYGk+Th$pJhhN%t z)Ww}HSq_Hy#Y}8oJ!<7nty$!xxsH>@O@a3LAHb=Z@_!bs2vsz0qCgkq%M#A_Mk~c4 zwkwS?yvv--@w+?p3*=)SuAGd>i@v)I*)eQ5Tg;90Ie%?#2>o6f`zdHUIbsiALhh*Wzgbq7fPUJ3QktJ1$^lOdan*#a4s4d*;7`_7 z?8pcZpGz3UbK>d&@b$Ue~J6!qhn&z-_pTI%d^I?v68)Ik? zLf^4L?%@lELD?;8rel~*U&iLyLMiB&-FlY5c>IUS8g1+=7?H<~ebcukOMkH9!|QIu zk>+QU{qXYbva1Mfz%44l4f%Sw1-ijiF%98oAN{5q&<R&)u|?0Q{5n^Y|Ke~;6Bzg?dr zMVdT=5P5FMXpHZ}C?Wy!9po|8j4r~P0w@-CN=VM%F z>1EZAYQ{N?IqO&@Ai?`Np2ktJVuymP-ygc{7QR)r$ASL`9$+=%B=ld%!OioZlVSAl zj!rEdWZyx83r|yQx@T72|3FTPC1+N2fSn%07TdSA2KaDBjiBp~Hkga1EPHBGkeV>i zoDhwq>d($MwtEY0YQ*Y@oGQ1iRpWmg`}GJ1+OyEz9qH@HjQ8wTKt*e#bV6+!$>=(GwE`k)rItKqjDvJ4hBA5)IzbcVw+E3b`Nzc(-MoJ-h7w!?nA|VVgF4)ZNQ(Q79d_30I{cKg88M?oN`pkhAjYC9G zg)EM3m2TR&tNTb*^~amtm#txHfoKf99$xrQA1L#+{qHgrLx`x!@$TrRyArov4v~Dt zzOD6V^hzU6+6YKa`&Wf&=~i&F7y?^BTxgVZ)0+5_k6;t+5lra>v1>%{C*hH_q;uW4 z??BFiT}@mb1Vbl)60Tm%C7+kt4eC7YA8? zY~=^g{`UEuC7AhJC^bL1Vw%kAv+L~V0q0v>e5r9_iR7&W=KJf0N#8a`j-(RJ;3b#i zlQPFBREw^SSTj$54f@nVqDBK5W5#9>3&EAX42r#$+!}ZS$)6*h`FXA*X1EFQ@LuG+B^VW00Gb{hFkIV<2*0Cb<3C!CPClLyI`Z1z8FMq0uTE_O8r?|>c z_;Ywe6k3MT0w`(3)_XT!hU})}KEa&~OvA4mqImq`Ce4y|B*B|Fk0*!27UawGWJyLrfDhi%%^v8%#1kb{hi;QPpM%v+>|l!kHtUw^6fzB zh4R@)rrPUxUdS9zGGirQ|GwTO78C(GXXlv4BF;}3!0(vjRiC_qI#=;W^l)S@aAc)F zQ6kROVLNttV7lCB`cOwYM1W-+#>kOQP*ID<*-JvB-kJNt8`FrF#SpvS`xj+jBtbe& z{7g+xEVPAAg)4qJhS2tn?=-s(O6bGABv({Woxil3E|*-QYko7v4s;W^`Xjfp6|$%E zbzXW*XAVTtp$hgG4Xt9ODF)Oma)aFb8tcv$!iYHzWCY<2@e$nH5M> zI+&&DmsJ<3z=fhagSZEcat#f8k4EeUS+g^;J=<7B!I{Y%N2}sstPMy8LAN zhd7NgGl6I|pE(LUO%&f(I%-h+r&% zP^K9t9pbd8r6pZN(68abxz*mmb3Hmfcfx=0>n$kw&tHz{5QtH|$MGdZFM*6B=uHXW zD*x%Z+LVOYwd-#H<^$kr#C)%OtL|Qm#z}J{7E-~{f9GX*XVApS&W7F4vmAB3nlFtyI2vy%UW^?$}-UB3Q1kW zZ=9+qFm*^(F;yCp4C4P!H|xuC`bA}GaRpC)~?KA>I~@T1Z|4%zPEb;p#s#V z;jAO38!De9yZS`N_7I3kWi6k7W07xPWUUv(CgY@)#Ldn@S6$4qdY;xAJ#Jg%;$g`o zKxCaRAXDFW%rU9jviHt1c4|fQ;QsMIm#Y(C^7M9e5M4zF?yjYC{qyD*TKje}NVvbF zMLiqr&u<;4v(8p*&3mfw9c&I4LY4@Xd9?BGNOs!YZ5Jjm^1^@*DTx+&bWtiIZ~3Hr z9g;abx+Xr<^y>(x#Z0v>kXO-IsMPiN&8#g*7V8Sf#_;&?sY1~Ld)ia^4rG~PXuLsV zF^Of+^Mw!B7v3t&v>;oI3z?X8DgjavK&?j5A9W;Xxu>20{b<7VftuN+Q{Dn_bv`^} zz-OGF_q_c5jP$8*VHSM>x|z{SrDN<2W=yblJ~-PfuOnHY|(rf8=muE z@in9-z=YCm7!Qt-yLV}d6Bn{$?g$#c%tYS@zt^Djmzuh%1(?>=a29@CNQoNt#1n}B z-l+`*uGj1{Z-Unm*=QfIbH_f_&;@I`Eb)O>bgXkxR4><1|aM> zJVp=H0;-IDz(K0#5L5BvNuH})04PgAUMd13ao8OB5(jQ`im)G_z{B#UPusx|@Ycn- zd9TcKPKkc>`J&mnvJpR)0(Y6X1gKLvKPfBunPMm0e~3251PZbN;F2QW!7JZvFWYKw zrhYJ-DHFLiGWPh@P*6sUjt9W+*yj2{FicX7fLoE+|KZx;yWev;3c5I5IOW>vf0)s?>C+#ovh%y*IeONT8?)2Z%p~e_aCrxL^O5>u zY^pgsb~+bOH1oV!gN>mJMqHn^;kA5xx0eTs(G%Z~*-&ncCn*}$J0cc<@aPx)M)L-w z`hH*TT8tiq7aG9y2xAqhs_pmz7Tp`)w_#&?^yo$XNn(~t`mlG$n}%HkzDWK z%pbMo^SX0YT(I6;H#|cC=1}{MJvJ?tasX7JbBJZ_-!{z8VBYt}T1y1a+xASiiQ{!M zXf!4BPO2{{K3s&qm!ru$)+qof-mgRJrF2kNW9W?r;EvM*L z6;8(V>BFIi+UApwMU2Y$7hnJAmfCFUuG3yg3!$uah!1Z16D)AuzR68@i!N-^`^oqn zfF|{_GL#5(2UuUf6zADlsVwSUG(mp&Sj#nUgxk*k!vFk1LqFQ77SMU(z>Io-^@bgO zw6xLI4Oqsy?5~_xj9!O0S`f zTuWtu);E@P&xGwKuU)+A*8js!cf9hVN$^Y-JR3Y-Y9qd*2SN7LO89=MecxklRtt@Yg%VK>_SrWb)>fY)%EcmS^$)_ z^MY{g&&3lINRbgMlsg=ED|P{!gVNr8M7ladKW zYi>u|pN(l$z4SvTjcH1lqlPn^7Nx-hpk18bp@fR{`2VNQcCnvP^}W@FhXb8OcIV4m z(T%ZpC%x@Y}m^(H2U=XVO)v2K|BI>*TkA0ld+%}@KnqLx~ zKep58Fn|7blg7xzm)j3}y%Nb_Yp9LQO203%omLS1Bg;~QK6FYpd3v$gqYk1pX0Cnc}K6deuup+b<#TNB)jPM)=H}3_&xJQ{=nK6|ff6WUwQUWFC0^c^W0y1BVeir*Mn3qp@AqN{Js z@4_2XqH61NX=1y-AeP8q+X(9&zb=_sL3QwQhHQuKKikbqZQ*s_Qq*oTS?37$b2>QG zK?u6fy4@+eFWQU$oZn^FHi1pOvRWESJ;TMHjEjBLT3$CIk*443oV6046S;rLYDf7F zL*esc?e7)1A!;dHHvjL$;`M^xOZS*ng#@eGSJWcKobTIg@PtrRzOVEE*-fa7U3-i5IO1(v(tE4QG(^D$MRb%EfiJ ztBX~|5n%K(I});nR!wJRuLPKX>LC2OdErJD3`=Frga2l(TJoJm-kpX2cvHKgHrssM z;>Y5ygpk_C_|-)r`J!Y$lG+h<$b@yc^FHXX#6+y~Y|*G;R0 zqDQJHZ1(Q@T0XmS|69|~3F$S${ZPZ*c4|N4YcHp*a&g|4mA0F^pU1X4Sw-faaD2s5 ztSro`h#W!tridzbH^ToFKkcO`4 z#d)Y*VKt)ywHRTBA;~P?bgUb{-9lw^c>)Ha>#@Y=pMp*gm-rifhxPML%ECk(wq7Ft zP|fbfF#A&XM-#P)BLCDgxobo%O~2b59eU~i-eMN3X4*J^e!Ac!;kU*W^_Z2LG4_0c z^4G6v&gBQDJ6XgcRIrC66bTc${tWY~lZAn*IB+aX$*r9v9dq`~bDgyEm2)?;N3j-S zst%ujS~Na;H zD0G#P!EafnJs_VitScN)XxmIBd84U9q_2cE3j0GYIi(n)f|tZs-dvtj$Si&=THjDI zQGWRJl}7&jH#j+Mxg4ZAcYPYpLkJI%ZPtq!ncOCkBXs8T(U<@1Jh_y5J+r8-r$qRu zLTx=;OKX?rObp9iDSy6J=j5aVEo)t+d8=r`@)OHrv0emfaAo@x(u(!nTOV8*AAm>A zqUh?~!Hh<|gL2t<$M)NG3^#1+&r*CN7Io_b{MO$#{^_=vw&8$0&h3079D2-jPRRlj zLRY(CbI9QBUk)IB@M_FhNI!!t2xeyGXYTm+hNOB*$#x1N^B#%uYDUWPi_$s{{I3Eo zg_wjxzH4&I6k2)g;4V9>Xv+K2$lMiJ&2hF*CUI3-ch@F zoJ|kzAgWDu!8_tt-%T_jN-nVGtpklu@n9#_joIyfLpe z>W=Y2u&jX)4C%_a!GMRa!gkCV4ZJ3Kmi;cR;g2{x(ym@`{!vx;iR19*<^$8PTY73A zOxgaWT>Nu;j_(FNCgNoAZ273r@J@Ky(EG5NpxAGkr;KiT))AhGLB0vWF<@YkLTD ztsz<*$2el6j?bOIvpb&fi<=MPFgKv>h=S6H%?(p4HPAZy=;uV>||$DF@DeyqPofA zz@%XDdr5GM?yirjjl+8l5h9LKr6-9P4kZ*+o$S%Wf&aGiGe1s%4*!047NsM( zlyvSIWPtle7x^c#Qz}uT-g%rq(vwQSo%V80HM~cF1NyOu8->B0)rS4To>oM zhsu3sZET~d|72}DhT`)aBL}W(axdM)$%_AbxGcRxKEF>=kxSW1Y-V>5IHM-UG=Lg3 zwqf!b`_BFFII4G~O~?Mgi*)IwrTC^sFhU>#%{KkpjihRgZ^47uwjUUiD#^w=N|{I}WkJUsE3j>wd+3TCj_|i_!ROe9{Rg90jiHNq6V}-3^S(dpG1^By(*IS=B=7SYRUUN9CBup{nW{!; z9hs$%g8HVDy2oegl~_+*ptKfp5Jjw<#5x160I1qXu1Ce9 z=YENWKg+*93d*aupOA6%z*5CD^4GT7 z^9WWm@RppjJF@*=yl?!lC>@{ny9RjE$`a9jtjL8&T(5lSvYJp z+gs5rq&2^7a8i`1Ld%oL4wC<7i0GS5kc~`UHPrBO2*?^<3^qHX$Ahrc|Oksld#_6M!9WZXo48F<;Ah1$)*z0wAY(;^oB^>N^)D8qOJT|0#n zN`c2svAaY;^ZKO-vxwx{?5lqi%JMkRc6$|7vKZy@ zL@YjL;P0%h{ii%MBfYcokM>^kqszn9%^Y*c5hxTD2>y_iD_U~GZqBMhn+vgB{LTJo z%A==mVIxh5kWK~33bQFk_Ew8g@;C1b$Y1e*>od+PBIf=nnd^yN^4MnsC%G&3Eox`j z&Wm>(@wd~ms`<8&4%gh^q*R4W@+e4dHm*xb=Fqk4KW`PF&zmWG^OUaSkBldfJ%$wu z7cb_x#yE!cp|uh-+cUR^G^|p_5+c<;pWFATUE~&(ml&MvY9gs$+mwdO{{<*X1V7%ph^AA2}QOdT89sN20I;NB> z&eM28ISux@t<1sbD=><8Fjw-zjc;VSU|n<;AHSF#Z)&)r#837+6h~=hR!-4QA;*j_ zW8l}At;9Q-`WqgI1!P#eFD$R^;<@SjGirmvcJt;fgGcmwIW3uL(sQGfv)0+9Lg|LA zP5j=A>BZPLuu0eWXXcs77q<@1?g6PYUzpwJUyg%!uP}Dlzaa7to)ZHq ztG-_Oe4YL&+luP#Np#&Grmr`z4DZUs@fL6UvfHYCkmI=(W3-G1haxxqY&XJb+Et9m zWF~KJf@sE6Sr_N9A{=Yi(r?Q=2aC6%Zlc69hG)=ajzwgM(Ur9)Pcx=%Rr}5NSP|7& z`QR-DYN1JvI`VG)N6S%;*l@LPqCE<7m(=?EldQD$BUWzgV~&UBHpz`A!KHZ~q76Hl zeoveJ^`{UNjvBr^V;yu9Ii8LUq4PxKw_%B;&j=P2-=x#61YXQBJgBa)) zJ+@<5!&?e%>PEj>vq|qnd-rXgLb}T?gxX%7-GrTwnOR|3)YtLByH6KNqc&+*D@}Ld zzQZ)Tz{?Q(+f>fbBvY>nt4`vHA9c|A^v>l6e=@g+eq^F;ETa*AC+&0Ws{;AKv&>^s zDo5TO(rj~U+$9Zwu0g?dT!Z=NqYofG6JB=%{d2dEJBF@I4t=EV7tfcY z6qrS7;+VX_sX4_uonRQm$xO#-L%~ecpuhz+c4J8FyUf5rCiGcXbjZq$|K_dR z*2x68>X~)3mbdM_EpRgYlGtIQ%6_|H?2hO*eBq+>KFLfZFVf(oB}k4k_j1f4(@7k^&T5unHo}iJo4R?U5@ITq0YJni!H`ZE4is~Nti#$qo<6?@BY-ruSwLw>vuQHeuglU4b zRYd8)b$>4&@El)Hfb}tOBYsS3NASsEuNK}0Wzf~y&3&RwN*kn%`PsxpQJ+U#HSU{0 zy=MsTgIf50~0!=7*-W7XO7ztX2Nw3C^|l8;6J8YIZPnGg7KDWJ_XG zh3FR@8h2%`ydlj|Q6^hm}0XKHQ}D?sT@v_q3~Y z9>ylL7*!|ayj#lB292sQK8s@j5hjQNZq?j-^JuqKbQnJ1+gtfK7BE$li=R66D<;qL zy1%>p_~d++3)XZOhdGW&UoYclhAVU~`koM*AWIK#*6`+ucJ{I+beE%KFOLundzMJE zr`K-KzHM>WDo;sY0EcTQ^~)_J52m00d* zralE{@os}K;tXrw*NUrQlQ=>%tyBpQ{@d87ko>~uKwB|uD}br`uWCDb&eBywW}Y7Q z*=bMpX?N2Bomatn(db8J`0u_Y`mEjya%TY|iDKR@JH5vu_Rqb!t3nno)wECv3tD%z zjVT``)}vIWBfBd)UP}%B&U|A0ZkK$XLIT0J6p<8K{PrlQx06S-+qOEq%6B|SHvtBw z36qfPEv=2RAOI?$Z3~8@Y*@z$hp=L_IOQa}8*x-N9qkR_)%2oO$Re^Ka-A1`=+wvf zLb=LdMioDLl%viOd%|g8#$mORJ==QIwSOY$J(Ae_-E#=>F<>*2bn+(Gw4_2{wG+-p~vbaddcdKG&kg6 z=p30-saN;e?&Q3!Ty=9f1xu=V0V}bm@?}N@h_JVbB&dNKxcD^xb?1up zLzt8Ee<)uNC})8BDNHWPyZ+J?1CevWvq&O#M9CxG zD_3mjVlM*Gy!tp(wISjVcwPIFHrq zSS=jfOtk@mPN`^8kySBRYZQJVUBky{SQbdlxejbS$K-f|E_HomP4``M%REXnOa3AQ zOkZKAldDpEy35Jbs9%+OT+PJdj@q-YZ1=n4X#V>kD$yB$Y>r0>!7wvpKlAr2#O*bH zMup~*oBsD0FbV(eDXNyscDyk|X+$CiQE!RQ%;J6kcIE$zhjk6go>U(e;1jeFi^YFa ztE|;2wR$w}WC=#QlOi&yQvbdccQfalU@)Gy<7=MtU)*Z&f3f=*uo_veiI=j**yI>v zPxC!{J1PF1t^2My_nm;5?_lxCrnT zUA+{z6TF^Px1xkvNr74_}ReRqSp*NAUbH$YPUE{2D1 z)vAlC-fWu%<|n1sJh`YADhl@C*(PfxW^|f$NNdN26N5NH-tt*bA4b&6P?tBY>%PMS zS=UiB2ZNeRDsk{e5|MHo8q1>zjTuDmz+k7uK^)meYY9C_AbWx@pLxXnNjMVx!~5Nb zPw|gU>~#pJCx~DU4Yc!b@qlu~{Q z6b$5_L-Us1qZ=%seIu9V%-}Py(!Lr^BFTYRI!s6eGdIr^>o))G)EBxt*>b@>b%mvRF{Hvj>Xskw5a1 z)o0X8`RlphkR>W(OeoI#7fpW;iTl}U8sIqHHSm`NU-7YL?T8XkFldO zxg!I|T!o)KMAT6uO9c+37xSE7yiH$wB2u|OFcFAn-MF7*z&ZMpc1_IABN28K^z&3D z)OI`D&VBO*A6^Rm!}2T*iR#KF{(z<4iX_-B0SI>f0)&M@kP7M#+pUo~Ofe7KG#d!m z{7fOik;U_ls*`qThFBz?AMH0cHvAFPy=`bkBCQ@|70wCmiskn8ww1!iypm%gt%|^m}lav=V@ch1J8S@6+pO|Gsc9!cK{HULeuyDzO?)kUa`A=^VQH(7eSyAmsifi3R=$ z)UVY|X6*_0K=H)kWY9|Qk?bvfRl@l;zYgxJ2-V!I#tKBu@nXpD^zj3|69YTiR^kYf zO-Gvn1_y#ZLGJ_pitv*K?K2w$AT&ig{r3J zR+oM9Lc1v+_&v|S3qm{frKefU>ijXIYU~@EU@I6X(VdDOi+KRM@MJ2Y*a58j6n+}JsyZe+QD-U_T{5{6^^e%O^1(sq7|Xgbj=Jev<9SdRkQ*mh{%}3W z4mQN~hh|V>5G_tQ(AV1K^Td0w3WmZ8hVr86u||HwLym4Yt<`MDF6iAhMLa8bc2U{Y#fDi7Ty z94M8G)p0yj9Bra7D4{AM8=>A+B917wi4T^c*5JGG`bUfzWPt5Mt!Fh32z z)oW*6*XSLxat(DkZIi>LprQ;3Gq7Tr@2jjlr_erd9B6U1o97VWc=WVvu{;a>Z(!PY zxN!YhkImqf0$Wa``jU#-wp{bpb;+l&3UV7!pY@@LPfWhphFZJ&1ud0ogJ5Muh(n!L z_7DmH6T0LTc1EGFHb2)`W!&<*{e1?9-w{88CpwLfnd=~!t#0s~&zHMV|!~iD4x%T>u zhXE9Cm@b@)cL5&F(3xkxUq20cjo#y?$Bs@qtK=WN<`hkq++I2+Fju-vJXc8-A-Y5A z;3jCQ4po>t3#m!}EA!pGiR~(zESCl&)nM>ViFci~$(nD&8M%stX=U%t*^0ZAXRfM? z!Ec1df|qc*@3&6)EA~F&@@ci+rH^?j9y$mgH14HFec}LUQV!SXWx0pjgb(*voR!BV z>)$w{U(k!jR+Vv8aJEZz!@tk~D0MMFy21tFqSbB|#SbPq<#qa&DDFeQ;Pu~peIv#* z*@`^2?)I97_$lHjnr3$Qw&SP7B2r(Ld8*}7`@r8H-#{r6`?Sz=Jtx}#KWZ$C`E=BY z3M^v5%7ufo(4r%pZep>M4U|Aso_R1^!^qSIeUMFl;JY0~Y?!(vEB*As1ND%!=8JZyz;T*_0n~w)Ef#~2K0#`{ zJVD3JfKdFB4mqomm;CzZg*Y?(QAu>Gb;A%x-QZ;q1F*yFotD`XAw-g1Aoi^Ywg_TV zM_7FWh}`~XKQ{L^_wS6pDTsslS&q8n%b)i!zopff8f)Bxadylr-<(>FdSbelqCakg z)H-(!&lN6ft9`g_d}r;Iyzf!tD11IAU-<~z>R~hNc-(yND9O_3eRKssY;Oq6Tf`O+L7hrLCr2L67iM!`QD*S#!9W6D@ zO&X=F95dM)qN0vi4lnsyiTg%-7>Jz0V@DS6$8RM6S(ciU6Z&fqo6_Tv9x zL1!1W$DI0YlPenKrJ-fc-|p4@-hTnq#Nl{JoDn1>g5e)-D+rx#_@ zx9u7056=>#|NM(hTbZDAY|Zdo@ppvA`?I8Gliqyrr(*Lw<5^;WP&x&H>786qdN196#ag7 zDw44%iujXdZB=xxgR_|$x6YYf7uYmyobni5<);wbEf~5pH*AkLi$^09p~Gf)0`ldS zPh~F^uHU+qn}w+1rEt-Y915bAQ24UV5fQdl_no>OA@-ey4ER$KfSZq2xzj zp$m_fAEGA7Pd4|fU8f+enRXFSN5mfq0<>dNcV7wLnNWS>4*|KZ?PTJu8b(&m;7xse ziYq9s+xg_PVdfd$eI^_b<>e9_S9+zAj`h`Z%q7%!IQ#}mS1|T=zTh1xtd-cm;C3ip zoJ7R>pijn-2DJn8E?>ftu|ADQGQ&gOc}cj9uC+5OjtIxLadCbDN$<^dYSujf7xgV) zp_>@)<8e8B(KPZx_f>qKd4!57*TJKD<1_s6N!>UqZ|wj- zBOUGwX>qKWoshRFZahj1T^6PD`#FSTV&T~;drAFz4(};~4CL)BrO%&AYO#4Znf)AO zpVipLc;+@^yURfE#PcG=@ae({4jr4(1I>gccVEdcdUZ*Ag+Fqe;e{E+?`NGZM&QKF z?0HTPT23lvHFRwXkym#&>Y%{Bx9F$={hfZ?30-!U1kfQ@WzWKoyg%8If;}jmo%JSW zn+Gk+;4iB9NzJFrnMc;}no$V-OT*Pj6%sM|n7ltmoIf5Y`_MIJIjNG)*aB2w)%y1y5LC~KHib#l7a{Ox$8&!2ihI9y-+={wXHSl#Nh2j z(VnyMl1BCXT@@aLNRlIj51iI8hYhFBI$qt+&l^8TauuPJpoPQ0SqTEndej?b>hXyb zA~4hI8MHDWP+I%`YK^mrW1~e3=HCF0l+XZS6El3y%HF6D>OC0elLEOvIF(uu@$)t9 zyT4jsm6RcfpNWHm9vF-QU!Qvo4z>8x--w`rgWVnoKH;ZWyD_S&lEl8xSs;if;GZ~D zBIZVgn>R+%Aa<6*Z{&%c#B13HJsPbB9P+p}7_xS|H+?8bb04oL67f%yeR#8N zZjlZiFaf4)QWBfo$uQ)vp29`)lS~G6cH9(*39%U3qbC6b5C|oX?3>qW+^G+FI1<6F zr&W#`_TG^~BEXzY%Lkeltu+zE*OZh@)F<3LS7@zT!r?59&f-Wr!Z~6cFA=Fh{3mbj zu`roK{;)-@p`}dHCnJ0-@CL2uJhX^e2?snq7SQ2}UlBDAtBK0T1Z07zIblp;)Q=RM0aE@h{7p2VSTH!^p!#SQYIbas>QXrW^^QY?# ztJi0{q&4_Gsl`3x>$vvmH0#6 zfz`iFo@HXQjC;~Yi*|J0OzZ2LL>*^)z(=5%<)O7t62GlVM^ef}Wq68@tkTQ1>I>ZG7cU6rgGIOhm%c)6t z5o%{b7x(lPhN?=`kjFEFRg*$j=$D~S(L+=dkLav0ZbalkZ7pV08?^ptrqjgG?|KapLMx98wSZ{E_qCgS@_3;&9@9L z3+FA!I>0ItCqWP?+PVscM(;j9;mgUNvfH(Y#=%K}@CZa2{pK-2Kz~1@i7(S{m)qoI z4a3z6e4eiTwxx#S{fdFemzYjM)fwc89OGC!;#^NP(x7(-THFN`|<(~iMp z8XRGOQ%s4SVgh28Jj$^~hB6DVQ==V;MBJPWzdAm`fI;%!UIyub&r=w7;hwVglRA3| zH`N*HuJjK4v!auc)*0#zasaG~kb=?D!A*C99EgxWz-G|Mm=$=lnWvga60v0pCS_ag z^fWys^!T#p{)0(Nf@u}OOUaeKk0Bn)S3!Ci&nj)PyN8rA&-w->AfE`fTsqJ6$wazi z1G3pLAZJ50wNKli)V8DkX+P&@e3XS-L|hXyY(kCzQ-rmGpP_v zdlmoOH|o8^2CdaePEE@XGezJdvBUyLQhE)uHwJA?Znc^n91AnR@2$n6D4hH=p^+rx;Qd3<_n;#va!D}$ zw22tJt9j!#509D{JqSv_8vulc@90ywEx@fA6jjcB&KoW>+?;_IP?*O7AGc|OIS}zmhpa+RtTjA!7?j#S}X)7TfA0*21*W^`tJ7j%P80n2o}fY5e=%LzkX| zU6bU7*-P34|GPR+@F61g9}YsDK+K*gt8XdoF_^U!CBirSRVydugH22cbA?dwB z?wHF_bD2J>k?TzaJT97m*V4Z+KBzUrQkOLNSYPhWSI8%Pq>M_}yg0L%g4cxaBrjEv zj&bGzZyP`^=Kmr!tm^-B8-ynn2GTPrv;qd94lWb#n*^xytd0k4%6jg|?@N7#i}C;${k56gQubptVYIT6z5mnHu?Iq(b?1mqxgq^LJ;|4Lk%x&c;Z zhOgfS;4W#p88}(m?D@HU3nDLQ2p7g~dX`g-N8ydfdy>8l{=-B0R<=VJ-=+t)VSCES z>*0FTd}hCbbP4A}oOiVuUw(}YF5F@4eb9cQkaFmv0WzVyHUX@0qbuGnGGKMXINT*7YK(3&%cxIGnwuuPQO}EI&7k( z@)_dQ;`>$1mHQ)|4I5`&Z%YOc;mAB0B-rHG0Rs#5k3|(X8g7~0eX`N)fifm93VF5g z2h)8gSgn(VCEV6S17xqu7v0j=(%7~4)G-rC=-c^`dG*FnUe77hcu*|c@46wUfZu1C zi_NFMoT}N6a%;fl`=umO*5y_kGLN{A z=A2A#XAW(MDpkblYOHkw6DOjcDdW-T0RK88#qmEqFYi%j1$<^rOAB-7UKL@DzLwjI zg5yTz8M!^`0M_Gy$2fH6+MoM`S&0F;Q&tw3c*7!9fq4E;)ju z=dTNkI~JO>ZNQJyYTgPXRJPnxU!I?2s?XEoaty4O<4yBV>~jjgrDRRO)a5i%6u4g^CP|-!N&1rR z>htU2HbvCcH3fF@C}|@bbYaHQ+UYy%tRp!gp)@@tI6Bfj$8=|9t8LJgr_b{0ZPw?T zSrfW^2ET9Vxl3@+PHf91F$X?moTjQ+ugA5(SO-0{z7)m7R)Td? z!xK{(n84`vFH)uum1kP*Je%&ln;hByPR59PFwjU%r(-kI4K+XVvv&P;y9=&ON+bF><3n-rJL$`Kni>R{dnvw1)yekBFsBq`4_Xi6>OJGl(xMfT z(`}r*>05aW-WIA_I!PkczKpoZKk=VbS>aWq$|!9KqNs~|tMGRxpIYoAqBP}|lh(ua zhuZY0?c5~xil84E=dzR7!*GI_Qli&{jDJ1;+4eeIM;%%lTb&UHefI&c+=-~+;qMw< zS5#2i&{9H73*emQKzrI@RPXk~s1|=xEF7hzNX7$T2 zWpHw!b*UcARfMc2jy(x+hC8#-v3hWWgVN%%`VcetZHe78MNZ+HKmzs)s6FEN!%e|0 z6kP$*d2w&iIM~B#UEg~l%k2C~D>xlqR26-MFm2Ju>rKZ;a9SWHd_0ruo7cbk5&FQc zewwm$1oY`#z1D=q?~$$J$6{@OJKms75H2zMtrP&tW2<2FuG(&RdKQ|5*8b%@HsXt5 zCc((zeDeH}pefVJ;saF{$Y4 zkNcrxduZP^>J@+bD-hkK{87`a=m;`BbPq3YhE9%C5{a1HjYMw@U|nhWJ!sT!jiy=K z>c_%e3r!(Y@d$2C@D}10Kz|Ym)xp7}841y6Q4%TwY~Dua%(}l&)ZG7i`HUE<2Y1Y& z@PL13*>4F*tvWiU3W||+`Aqy9hUAfJ+~cVlsb`9skd~p|Z=YbiavUKZ3|LcYV1eA$ z?k$E`P;?(t2T{I-8GSz-p&<}Qyd(h_qnyU9e!e&r!+;YX{GN5@TbQu7rv0AT+g$6!A z*iW!RwF?5%(qfM&BLs`np&>#}u0+>fMX8(~hsATt8x@?Zi%IE1!K zat^q)YApdDGb)oUM6%w>ScXrW_DlDV5}eo@qWN32T<>o z?$x<*@dB-PL1ikBlDSi99d@#I7O!xJf;W-0Gqf6NXHf6cY{`-z0kOjL_VbYE*Jyu@ z{|faEls#atzJ+YC$KLNDaFENDcUB=lrMxSBl35$S=@V>0x&LeB^k2fY%F~)3Qa2$domDoa z>T(b6k<$_5Z0+$rb{BdwFeK>=WylK4-S*u7)PlQftnQsg;owKp;ul->%g!pA|DsQMzCbEkW0Z7h>RSnHYjyKu9Qx%u z>u3kn$IkWS{u}&TXsg}HxM%oHALO?8xI&&9MvZ5xa zDQX!oWDOVsv#?R#Y(<_Ga2iKPpV@hRekF~3)8}V&0(NGkdqHs>Bv$MI+-8BHjdfuZ zw!oyAzB;iq{v6rJ=Lk9mR2F7};2y=7Dvi)}1sa-JkZEsaw2DbuZv<-)awiczC{gfSn*l9fmc+(1Hi7`QI<`8niYv(n_qudRa=dXpw8cgWnL}RF z3{x~prrFj74JJvbcKaP2YUUOhg2P)4=-rVq=Rho7V2f$g;L~nfe|pA4D7`|asMWPgvNnw*}P>G8p=l8w;2X)yd=*c0{fpU_rG#Lihx zjVixWx|}NJs&MY6A+$1&KW-0`HO+u`q!=w%6oPZtH1K4It`}`%tp_!0O1(*~IT`0P zM29`zS=^CdTo+Ew+PCZTWFc%fNVPD3A1Bz;4$5jupiZ#F^LGQ=DnoVv&4nF9 zxuUb-75V~d9@G|vYadZAc;2gdnx{(gBwzM2CuLx_!)~amGXCgg1Z*gNy~hMdVtl-^ zRHzZ!RHF)Au-1<<0v!6l!O44Y69hS8UGOCNnd_@;E}U;v(goCF%#HQqrv#-!W(K(} zBZ)l~r zE-P(Y7rujEzYSn&`lf95^tPo1wg1)^3{*kc6!ZAe_lCwl+!=uwmonuNP%rv??8Uil zWWNRnW3Qw8Lqx*@?t?}Fw8j*{NaiMMp6WccEsWU*mCzB=R(-11Qf8;p$-85LBPo;s zBj8V}S3{)XGoPcU7UBTU1{a885RlU`XRp`?{y#NqHI7AHADgJk*MUtQ33JOU=emUj z%6`Ynx*xgX{jA@m7`yhaoKac&f$7Y^r7{9dpW;CN=^g%zQKo$f-=D67)l`>cNYV+<9n~DgY@J7p{4!jpN z=bDx^fg|sYGOKn5nw?&-R!GJ7X{4re&AcViIn`4$h#{&MBsf3y96$65Zy_i0(DECq zyPCH}KVp%`5nP2DDjQ)4RNfHv7p^ZYY;uKOBVFVPJ1i2qMoDv>4Y+xfr`8Ex1LfZU z0F-80TM`?lShcbF=X7pPgyj+$l<`^)5L@w?J|#dzATU7$81sxHaXW3A}#duu1otljOv t|IjV}?@E<;@}I(hJdX;dZz}ognT1U Date: Thu, 5 Oct 2023 13:51:27 -0400 Subject: [PATCH 2/3] changes to the intra-layer API for the GPT benchmark (#36) These changes were made in conjunction with the axonn-ai/Megatron-LM benchmark --- .github/workflows/nvidia-tests.yaml | 19 +++++- axonn/communication.py | 9 --- axonn/intra_layer/communication.py | 8 ++- axonn/intra_layer/fully_connected.py | 88 +++++++++++++++++++++++----- axonn/tests/test_intra_layer_fc.py | 12 ++-- 5 files changed, 104 insertions(+), 32 deletions(-) diff --git a/.github/workflows/nvidia-tests.yaml b/.github/workflows/nvidia-tests.yaml index 7722155..3827c8b 100644 --- a/.github/workflows/nvidia-tests.yaml +++ b/.github/workflows/nvidia-tests.yaml @@ -7,7 +7,7 @@ on: branches: [ develop ] jobs: - mnist-trainer: + inter-layer: runs-on: [ nvidia ] strategy: @@ -28,7 +28,22 @@ jobs: export G_data=$(( 2 / G_inter )) export memopt=${{ matrix.memopt }} echo "training with G_inter = ${G_inter}, G_data = $(( 2 / G_inter )) ${{ matrix.memopt }}" - mpirun -n 2 pytest --with-mpi + mpirun -n 2 pytest --with-mpi ./axonn/tests/test_vit.py + - name: Uninstall AxoNN + run: | + pip uninstall --yes axonn + + + intra-layer: + runs-on: [ nvidia ] + steps: + - uses: actions/checkout@v3 + - name: Install AxoNN + run: | + pip install -r requirements.txt + - name: Run unit intra-layer unit tests + run: | + mpirun -n 2 pytest --with-mpi ./axonn/tests/test_intra_layer_fc.py - name: Uninstall AxoNN run: | pip uninstall --yes axonn diff --git a/axonn/communication.py b/axonn/communication.py index 5cff29b..060f1ef 100644 --- a/axonn/communication.py +++ b/axonn/communication.py @@ -204,12 +204,3 @@ def allreduce(self, tensor, async_op: bool = True): def broadcast_inter_layer(self, tensor, root): mpi4py_compatible_array = self._torch_to_mpi(tensor) self.p2p_mpi_comm.Bcast(mpi4py_compatible_array, root=root) - - def get_tensor_model_parallel_rank(self): - return self.intra_layer_parallel_rank - - def get_tensor_model_parallel_world_size(self): - return self.G_intra - - def get_tensor_model_parallel_group(self): - return self.intra_layer_group diff --git a/axonn/intra_layer/communication.py b/axonn/intra_layer/communication.py index b8143c6..0d3977c 100644 --- a/axonn/intra_layer/communication.py +++ b/axonn/intra_layer/communication.py @@ -3,12 +3,16 @@ def _all_reduce(input_, process_group=None): - dist.all_reduce(input_.contiguous(), group=process_group) + if dist.get_world_size(process_group) > 1: + dist.all_reduce(input_.contiguous(), group=process_group) return input_ def _drop(input_, dim, process_group=None): """Divide a tensor among the tensor parallel ranks""" + if dist.get_world_size(process_group) == 1: + return input_ + total_chunks = dist.get_world_size(process_group) this_chunk = dist.get_rank(process_group) assert input_.shape[dim] % total_chunks == 0 @@ -19,6 +23,8 @@ def _drop(input_, dim, process_group=None): def _gather(input_, dim, process_group=None): """Gather tensors and concatenate them along a dimension""" + if dist.get_world_size(process_group) == 1: + return input_ input_ = input_.contiguous() # Size and dimension. diff --git a/axonn/intra_layer/fully_connected.py b/axonn/intra_layer/fully_connected.py index c04c622..d61e2b4 100644 --- a/axonn/intra_layer/fully_connected.py +++ b/axonn/intra_layer/fully_connected.py @@ -4,8 +4,34 @@ from .communication import ForwardAllReduce, BackwardAllReduce, Drop +def divide(a, b): + assert a % b == 0 + return a // b + + +@torch.no_grad() +def initialize_params( + out_features, in_features, out_features_group, in_features_group, init_method +): + params = torch.empty((out_features, in_features)) + init_method(params) + params = Drop.apply(torch.t(params).contiguous(), out_features_group) + params = torch.t(params).contiguous() + params = Drop.apply(params, in_features_group) + return params + + class Linear(torch.nn.Module): - def __init__(self, in_features, out_features, *args, transpose=False, **kwargs): + def __init__( + self, + in_features, + out_features, + *args, + transpose=False, + skip_bias_add=False, + init_method=None, + **kwargs + ): super(Linear, self).__init__() self.inner_group = ax.comm_handle.inner_intra_layer_parallel_group self.outer_group = ax.comm_handle.outer_intra_layer_parallel_group @@ -16,25 +42,51 @@ def __init__(self, in_features, out_features, *args, transpose=False, **kwargs): if not transpose: assert in_features % self.inner_group_size == 0 assert out_features % self.outer_group_size == 0 - self.local_in_features = in_features // self.inner_group_size - self.linear = torch.nn.Linear( - in_features=in_features // self.inner_group_size, - out_features=out_features // self.outer_group_size, - *args, - **kwargs, - ) + self.local_in_features = divide(in_features, self.inner_group_size) + self.local_out_features = divide(out_features, self.outer_group_size) + if init_method: + initial_params = initialize_params( + out_features, + in_features, + self.outer_group, + self.inner_group, + init_method, + ) else: assert out_features % self.inner_group_size == 0 assert in_features % self.outer_group_size == 0 - self.local_in_features = in_features // self.outer_group_size - self.linear = torch.nn.Linear( - in_features=in_features // self.outer_group_size, - out_features=out_features // self.inner_group_size, - *args, - **kwargs, - ) + self.local_in_features = divide(in_features, self.outer_group_size) + self.local_out_features = divide(out_features, self.inner_group_size) + if init_method: + initial_params = initialize_params( + out_features, + in_features, + self.inner_group, + self.outer_group, + init_method, + ) + self.linear = torch.nn.Linear( + in_features=self.local_in_features, + out_features=self.local_out_features, + *args, + **kwargs, + bias=False + ) + + if init_method: + self.linear.weight.data.copy_(initial_params) + + self.bias = torch.nn.Parameter( + torch.zeros( + self.local_out_features, + ) + ) self.transpose = transpose + self.skip_bias_add = skip_bias_add + + def get_output_feature_size(self): + return self.local_out_features def forward(self, x): if not self.transpose: @@ -49,4 +101,8 @@ def forward(self, x): x = BackwardAllReduce.apply(x, self.inner_group) x = self.linear(x) x = ForwardAllReduce.apply(x, self.outer_group) - return x + + if self.skip_bias_add: + return x, self.bias + else: + return x + self.bias diff --git a/axonn/tests/test_intra_layer_fc.py b/axonn/tests/test_intra_layer_fc.py index 16479f7..7216103 100644 --- a/axonn/tests/test_intra_layer_fc.py +++ b/axonn/tests/test_intra_layer_fc.py @@ -26,11 +26,13 @@ def test_fw_pass(G_intra_r, G_intra_c, B, H): X_local = _drop( X, 1, inner_group ) # divide colunns of X along the inner tensor group - layer = Tensor_Parallel_Linear(in_features=H, out_features=H, bias=False).cuda() + layer = Tensor_Parallel_Linear( + in_features=H, out_features=H, skip_bias_add=True + ).cuda() with torch.no_grad(): # parallel FW pass - Y_local = layer(X_local) + Y_local, _ = layer(X_local) Y_parallel = _gather(Y_local.clone(), 1, outer_group) # sequential FW pass @@ -65,12 +67,14 @@ def test_bw_pass(G_intra_r, G_intra_c, B, H): outer_group = ax.comm_handle.outer_intra_layer_parallel_group # parallel backward pass - layer = Tensor_Parallel_Linear(in_features=H, out_features=H, bias=False).cuda() + layer = Tensor_Parallel_Linear( + in_features=H, out_features=H, skip_bias_add=True + ).cuda() X_local = ( _drop(X, 1, inner_group).detach().clone() ) # divide colunns of X along the inner tensor group X_local.requires_grad = True - Y_local = layer(X_local) + Y_local, _ = layer(X_local) Y_local_grad = _drop(Y_grad, 1, outer_group) Y_local.backward(Y_local_grad) From e44701234c945dcfaac05649978689c5cbe47f5f Mon Sep 17 00:00:00 2001 From: Abhinav Bhatele Date: Tue, 17 Oct 2023 15:42:09 -0400 Subject: [PATCH 3/3] add dependencies between workflows (#41) * QOL changes to the CI - dependency between formatting and nvidia-gpu tests, cancel ongoing tests on push --------- Co-authored-by: Siddharth Singh --- .github/workflows/ci.yaml | 18 ++++++++++++++++++ .../{formatting-tests.yaml => formatting.yaml} | 17 ++++++++--------- ...a-tests.yaml => nvidia-rtx-3090-tests.yaml} | 14 +++++++++----- 3 files changed, 35 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/ci.yaml rename .github/workflows/{formatting-tests.yaml => formatting.yaml} (71%) rename .github/workflows/{nvidia-tests.yaml => nvidia-rtx-3090-tests.yaml} (85%) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..5f32310 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,18 @@ +name: ci + +on: + push: + branches: [ develop ] + pull_request: + branches: [ develop ] + +concurrency: + group: ci-${{github.ref}}-${{github.event.pull_request.number || github.run_number}} + cancel-in-progress: true + +jobs: + formatting: + uses: ./.github/workflows/formatting.yaml + nvidia-gpu: + needs: [ formatting ] + uses: ./.github/workflows/nvidia-rtx-3090-tests.yaml diff --git a/.github/workflows/formatting-tests.yaml b/.github/workflows/formatting.yaml similarity index 71% rename from .github/workflows/formatting-tests.yaml rename to .github/workflows/formatting.yaml index 5d09fa9..7cdac5c 100644 --- a/.github/workflows/formatting-tests.yaml +++ b/.github/workflows/formatting.yaml @@ -1,18 +1,16 @@ name: formatting tests on: - push: - branches: [ develop ] - pull_request: - branches: [ develop ] + workflow_dispatch: + workflow_call: + +concurrency: + group: unit_tests-${{github.ref}}-${{github.event.pull_request.number || github.run_number}} + cancel-in-progress: true jobs: formatting: - runs-on: ${{ matrix.os }} - - strategy: - matrix: - os: [ubuntu-latest, macos-latest] + runs-on: [ubuntu-latest] steps: - uses: actions/checkout@v2 @@ -34,3 +32,4 @@ jobs: run: | black --diff --check . flake8 + diff --git a/.github/workflows/nvidia-tests.yaml b/.github/workflows/nvidia-rtx-3090-tests.yaml similarity index 85% rename from .github/workflows/nvidia-tests.yaml rename to .github/workflows/nvidia-rtx-3090-tests.yaml index 3827c8b..266841c 100644 --- a/.github/workflows/nvidia-tests.yaml +++ b/.github/workflows/nvidia-rtx-3090-tests.yaml @@ -1,10 +1,12 @@ name: nvidia-rtx-3090 tests on: - push: - branches: [ develop ] - pull_request: - branches: [ develop ] + workflow_dispatch: + workflow_call: + +concurrency: + group: unit_tests-${{github.ref}}-${{github.event.pull_request.number || github.run_number}} + cancel-in-progress: true jobs: inter-layer: @@ -13,7 +15,8 @@ jobs: strategy: matrix: ginter: [ 1, 2 ] - memopt: [ '0', '1' ] + memopt: [ '0', '1' ] + steps: - uses: actions/checkout@v3 - name: Install AxoNN @@ -36,6 +39,7 @@ jobs: intra-layer: runs-on: [ nvidia ] + steps: - uses: actions/checkout@v3 - name: Install AxoNN