From 889679a0dd072760f80db043503d45b484062050 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 23 Sep 2023 15:04:34 -0700
Subject: [PATCH] fix exception mapping for streaming

---
 litellm/__pycache__/main.cpython-311.pyc  | Bin 47800 -> 47803 bytes
 litellm/__pycache__/utils.cpython-311.pyc | Bin 123173 -> 129090 bytes
 litellm/llms/replicate.py                 |   6 +-
 litellm/main.py                           |  12 +-
 litellm/tests/test_completion.py          |  18 -
 litellm/tests/test_streaming.py           | 522 +++++++++++++++++++++-
 litellm/utils.py                          | 294 +++++++++---
 pyproject.toml                            |   2 +-
 8 files changed, 760 insertions(+), 94 deletions(-)
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 3907a3f34d60e36f1d9d9ea0f252baa562caf5d3..b395309b62382bf73a0074cc09517dbae2f7fc57 100644
GIT binary patch
delta 1524
zcmZ8gdrVVT80YJQQfe*INfm?zM6@ztDzydVwOR26Do9*HTnF4MHb_g(tyrWWqNsa-
zk8jg0VWOK8vxx?e*~?^^f0(FCCT@&vS6Rf&IF}eP8=1*Ww)0&yOLqVG_4ob0=lRa*
z=M%b%6FO_Z)fyWmzSs(z+%;$|ngIuR*^Ds|KiP>@IyivWQ<C`cSy_Ve>nYWG<AF$2
zWFe+zCU9?BjLs05*5tzObO$)`QAUEUDH+SslksuVR1Bxv3~h@;Wq9I+T>dycJz6W4
zi0;f*QHy7;j>RYLndn&Z5EL}IS3?NLGUnnAcc-vZ7bKeuf&G!0(l$)>c%d2VJvAV>
zrl$#gJ3jD~LKt1SrLbMo^?@rwZ^09}UQ_cab5ztPX1wIihnA;aCcHbaEN>&UVlcJP
zAnBzD-phLr+HhBXGPGk~{#@9JeVzmy&98$FELr-QV^^;}GG8iQDs~i^-<Ew)7nml>
zbfT@`dCTqw9qo=C7j|yJYS^P4;7q~C(5bbAK(&bN!q&n$(1j-oi^MU;3abQ9T`&tP
zixxZE+bSXzl76y|5-oW9)0(!>k5qf|llJ1dqD<H)#-4Myc%FHGuTgrvS9j(OTv%KH
zZ{nlV4tPso0={3|0^R5+c^P`Jx+E12^0tySaYmLT3h|NkFY05=%`ywgVINwnF2t%a
zlU9wNU#j}k2$%V%)pr4gc<-hRh`T3}*nNTr1P}4MSF>Dy5S<X9+Css4IkZJ)z8b%;
zL2hA}$@K-nmjo9H{w8>WE?<^C21JsD$x4$?2{C`5o?XFapC|4bg?vLWRYb6O{*|xX
zTzEUqF&<@@<{Gsp4%_FA>jk}QPkg&*+$h*YbkuVxLn#Yy*cT1k7mb@piqWLSH|#mX
z_8h*-pJ>Lvg0pxByKR9?tZC1IY<{G@)c{j?{O-dL_XBmLC9@HNpYXz-o8ZEOT^`8C
zt6jygoX76nU;;P#x&yERzwLf!<rEsQfMAKpQKOZh*rlWz{UNzgVR^#gjA8UvSOGyf
zMXn=wiNH%*KCu;erY9RJ`K_L1rsx%<xH03%&Yf$CY6-I`mfERiIk2r!W;G#MH8zR0
zh5j2r>&Oa4lyl1e+N#FV3a__Z)mM0zsm2O#rFR3PMyi=EAQ(_&MH@=h`=#)+$uRwB
zF>K=U(N|2??`Z&^0B1b=8Lu5nTX|C$s@dCED@zjH-Y7-T?$jB<COJT_Kz`=*H3lVl
ztHQJ;+v!xlQ1m*2JLu~3Sf-Pxt;&JnzBy3KkM+6ruog!?@IXDcefTa|w2RqDIb(SK
zq!UK**2yerK+8wtcCE2RkwZT5glmI}%r^3=r#b;f@KApe*m0oW4Gi!0zXmS8{PZ<L
z$yF+}S+u?XkN8yUW`Dr1Z1FL<MhOO3C3$7xb?VtFnr#&c%_JCE9q|yZAIQs~ZL?p6
z?VQeB#3y~k)=-po$(wO-U<q8s-v-j`v&pqmxYX#nEkS>s%)*#_R)P-RdG@w$`tOt!
zA-7&>QP>cfHrzhAP8{UQ;5n$m@cB}>gI}M|$jPQs%Lyt8HWAbk)Dtui9H)FHm}=PS
R7k{%3OytRMh<kX4>0f^$*ERqE

delta 1483
zcmYk5e{54#6vun|)^??KbSq<9StlD~PzEhzW$S)#(WaR&gvrEYXf1n>tuWeh+rhSW
zj4=lIgF!e7k`T%i!6*aaY7qHr(L}@_12aS`2#Y~6qAms!qlxjJ4in!WpZ7h#?>o1>
z_(1)^1GVv_(Kt;d9*rqY88~aK&4wY~m^}gF<*+oXVF<_aX7OXWb|Lv#UW=p|im75t
zFu!;vcNe6pvA@s?^D$nSs}1GE(nDFesxS+8EzFimB8@n_u$=!{Sg1|5v0zzoRCM9R
z%ckMtl3cWu-UEiZvSkp)iK2PfQ?>*DS(=0U92#9pU{5SZj$ppa4V~EL@`2EctioR)
z{9Eu>mj|}u(sB=MN&2P0CE@SF;c~aG^MqcddPk3=WtGtNFv>)<8yhRuKo17<Yf@xM
zj^a-hharajl~(A*c;!6UhH=+Syjj@_eOO<0K5P3CDYihas}lQ(E$DUZR|nEXn|_>L
z{itEbMm6n?9TRqW^)eVpe!{8hx1m2d5&|tEcPB<`=E6=KuBjEDF;UYjbpE1Te5$s@
z*4x_@Ym%j@K4GoUy$^d%k6fqTQ;$4|XKIUKmpJy^19kKDyN_t(JxA21Uce=F)$k%Z
z=444P<*0CTP9`3$>w=dttNt<Ai!Jr}u#flFFE<%`489H})V|2q5o-7GFIO#=@M5zb
zQ=8B6kDKpnV3c2IxdSl9<Ifg>=|?e_-6h;3{D#+_voHOF<b8p}^hmHxiL@)s>+^dz
zD&1_9LKg@h5-t+{BK(amuidNzQDk6BIOGjSm_N|QKEZCU%k(LwTqYPr221Co-WB?q
zJ0{DOS@x?Y$GFKcsY*$obJJ|Sl2>xo>>M{cCne#yZMIBm#QT9(HDlJedH$r1j8tK~
zQp+3t7Cjb6^7tV8-T(_w-sS{5KfEoP0tTKrZ~#o>H1TV~HNtf~zw<kAA`ZHs0zV(D
z12;GBTCG!;0*(!2V{~sbc<}1pSG5K@WfgV|WfzwVX-j4FyI|FXMv6U2s3*9|S3$~y
zXNDZm#BU8fuG4zRQ;N2OeV!F0lLd1qmwG4kN}zL-!h8`Wp$UmaMOam`fD&jTs}`_5
z^8c~;`@yW))Hb1~1qTCRC7e7$Lh{R956^+oxCQGD&4abP{m?Ty<5$$sD`3lD-{2QV
z3mOu_Na)?0)+w?~7j%O%Xg!JSU`PqjMJxXWy_<rv(h+9KJ!a6cZc_G2!UVeFF2f9R
zCU-MajGhZVJ{&KTU<H2rx(n9xj5l5bLvr_PsOC1lf7}K)@YZoVY{0ZPC(X&hhOiRx
zZc*5}U|3;K^Yjxtz&aIZ4F=Zx+q@xPq@ATvhf+cYzI(C^Hsjrs&x4aUymdK6dx`uC
z=8RbLWZIm>8t7zCkh_x{8rDiWjH^c~ifFm)2VvVXnUnNXjMQ?<O5Ul0?~RngCH!Ti
zz+6C~1`$eVTib*FR)uZF*{5ab<AbMft22M5q8NpxaCexEk(q`)XIH{C{Os%*SdGzh
v9+<$7&J{WB)XGh0BCI9&2<r(O2*;>?vrvhY4!`&x^`VtpVLz|r+jai}?ApVF

diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index bf977f9fda2c4b1f68d3c795e880c5d59e633970..5f546c9e34bd598e8ea75fed50e5eeca961e823a 100644
GIT binary patch
delta 19358
zcmb_^3tU^rmG?bY5A;NUka&YYV2m*s8$SRWFc`mM{B~^F4#tdZLy)kqWMha3PU1Aq
zqY1H+4DLE9$>!lSZsL@rxM|bcNxF4*lSZOyqb2p$+1<9=_P1TqXPRxl{mxvyu8^F#
zZN%m1-g%rkbIyOxoI5k{%o{0hOsAwir`KyZ{Jg%;V1Huk!>QTu@x$dOs|+>+;Ry6<
zZCdd=&6bAWd~doV!<GS@m}ayYdCqR=$@CuS&mOj!#6+_#i^z$hH`|e8%fW(jqU_Ce
z<k|AX`;^{%M}e(CyjS%WI*M#X;=Q`p;wZKii}#w|5{K1h74NmZrH(RNnRu`3o#iOE
zm5cZK-r0@{TZMR^+B?TlX&bJ@2aXte=Q^rv)#80x?>tA1twt=A-aFs1z_!3qYpZoE
zv@LWjvMs`LRgT5B#f~MmC5}2<oul4X?`W_!h;=i1mpYc&mN^=2jgBT;lcU+z>{xDF
zF7h(=wm4dCt>S%V?+QnotqnLOssegedi_~u9yqUxuWKOCx$Pj=x!u0p-e}*E!?Y}Z
zVt4kNiD?<8Vac7{oNX7DY_jjL@9>WCS;m`*ndM=+O`6_#Gsz0q*^Tw~knEG3Z7<KQ
z=19&-j^x^Fo|SRj3-}xQ*lrQ?@-Q#oUgJ%ZnT8ASzA&NOKCxU8=2;T*I>o$V%qvOA
z+fS@0K^NgjX)kfuZFb<q(q&kBmfb!>ngcxNL{ZBzbM~`53W~p>Pv^mIR0M$oGE#98
zY=_7kWZO+Dk@k?oqzcn+B}d3Srp}lO5AdYsq{7xq1hN22IMiI^hqy>BrZ`W_GrehY
zIs465bHq&+V`|?)j#MK!+0Q}(XuQMTYWck3BjhM)u<syCu@#BQ4#>$eY(*ewNYa#$
z*L+fLbFq3T&2qeVM`XbEj)nQ{AT3z86@ORQkCHa~(E~cN5_ujYtFVXt*zW3+Fl;+c
z*5Km+S&MX#tV4PmS&!61ZbCXFGTVR+$w@mlbUWFIsl%jSOy6WbK{jIpTQFZmwj%R8
zknOgU(0M1>j^*y^<_-Yaf!#kzj+2}5`);xm)4xD=A-#v#PRcmjDeo<coZ)-PUQD`=
z++sh0ed@rb?kD@~&Gr+lk8#@X#JmT{ek}dPZVt7&kSs*ux{&#Ugkb7J#Ex`?96<Uo
zIf(Q$IfT?px{*FYdXPRE(H9xOj-ci(u_j2b{V>Anu>(rt#2!)7hZRQ0&B{OXuzlR9
z9tGItJ+FBQc6vMYrtu?M2f#k>Pjq)c$9SjyF96-%4#PjfAu%^Q?E`>g-ruD^4@bwJ
z$N)K<7#}lt^6<2m&$$<#@;;FB1sEPT=9U6H@2$&w2QG{&^6Pl`hPSEkIe2aSXN4w!
zi{pPT^6>E1_&~`YRPdYe!<Cn0FgZSd-hXRg)_76FKc~X(@n5caPLuLHaGXodd#Bgr
zjgN12@$l++`Hqiy_{q3sSFQ&5@ugjVltcY^?V$x4xM}==Fk24)H~!GEU-9te@!JOU
zJbZop-rKIp;h)EQPFQ$&bNv20e<O$K@!RkDoeFg0u8~0=HjMxJ^x0H6JbwR^&!ob^
z@%a~?O@U{|gA+ss|1;kBO)n4fasTUe0Dl<QeCLO9u#8`O^DRBpjX&crF~dXSSAMQF
z!IJU7pRX$6&55tc0JZk^#03S+0{Gs<`%38H;a?|O)$m(@|C(5=g?j*gKk;oH{4W-r
zcr6XSp@6~(w;5^_uwmkvJlGEW@&a#~HB<QyIB9rzZ_CY@N+KJQyLs=!mrTR*Tg6d7
z0z#Gt+))p}aXnGDMHI}*d1TkgRkB<hT`7d?l@BSzZ^e)@dFeK8@3EnjSe~3UM4u>t
zEYN#5mF4EUQ^P(8ze1u3)kJlY^QgvD-j^LEG_?>+MM>rKq%cVyu(^$llXurbeO9y&
z;k<sH^C){X-pvb5-XHDBQVwZ6iXN@^`Nc)v`rFgI&y}Zor`|AlU#u|5G;SkJD}o}>
z(wZXJoFX5RdpI{wPZdF#Ru>kn$Zhp*$~1d5!)aSQ9MPXojrXy40?~V*BO}r$r^&+;
zL%6bv7~=9$NSgGSq|+J;6nJ-MY8iie$O49f1pXel6Fl6APSCoIr-$?COBR?5`qRU?
z^bc5mIAchUEgGXnzRx``A`6-nNM^j_7utx)13lT1GRHU}hhWd$6_FHHE0&LaHYVw_
zIZ|d0s}^Na`)N#Q;X)`X%Zir=VQF<Qid7@o$T`pJzd3)H-9$1-PFSV+qOg*7NNz+F
zR`wsb$59y`?tI=%GsSVskK<;Iw1UAwjN5RSU2+Y?u`7sUSD%#KjdKe_Z&(QRHMw**
z&aN;{xTT*aT%6`u|Jeu8CpAP-9J|I)8Ac_l!>8*TLNs7=tX>@VnEE+vKgz`NEV_{w
z@Hh0i6Gk-7W`uolNEigsqol;6LmXm2j9`}H!|KtAe2SByf|N>XNrnKidWOYnX#L_u
z{Co7IEUbr6gHj`f8%$o4%#t)HHAaG%v}7UV4bK=Z!tdkMb^TFc#d6U>e*ON(aSM+M
zmo}D)Mj+*J7LV`gj7F08ImQmTU4N`3h>YBZlPO;c$B3U!94pD#eHOQbu@V^s90FoS
zUrACx#r5_2W90_9kzt1B>kXFVOcKTlj+5D86BdvP3H+p{oD&))sPfrTxQqSA;iht)
zKsJ@1$!2bpO{J6#8@3{sy~w31%%zxAGhtkEGVeM<)x-v2RZ@0~k=^_dJM;l7$bvA#
zT9IMmT(L00i1U+rg+-D2v*Kq)lE45JHvfF8IJG5=)!lJmNHV%`I4lk;BN4v|O_8Ls
z#@b+G_4<grL3Sf4&J$#bM~|*FB$CueeL9cGSiz8}E}}nFpA==E&LHaDNxh`Lbc*@Z
z`m}T*<RloO<WuLH<O~ys5R1Wh(h!B3kt`KO%oMpxCJyX5LZTrNtz?;`f<z}C9cf?~
zOb(%M$IOe?y}r9|OaQjX^sfzq!X_jM?2`8>PPpqk_8aFm91HjoTyYp3T%vFx1<vg2
zr`kP1B$0<@cxIe2lL;we_Vw`Rn`V|ocr-`hQSli;_&Ty&YOv^F8<rr6v`CAvDIAT^
zoCH{{(sEH&5{TU>C-*#Bvk>$NL+JH5J6|7!2?CSmWZpH7TN2bnK$yTTdH>_sT|ZX2
zac)UKm?z>u>IxIsp=nmczuKT5{IuCS2?x{`x*qI*T&OE|K84_Fi-PO>WTogz+Q};M
zI~0LRk+V8F3%4b~%Nj{7DM})>7I7q5wHbXOMI=&zm2aZ^s_|7ii=L<k)9%<Xj?8++
zab{W@>WquMM#jp9V>xO3iB4~ZNoi^=<gX!ZpNXt6PG@ji7iu{?xz&o?+HQc``mn^3
z*cfHIDaMUzYN2dJlDBtB2g5#XhDaO2Ou`@&McRfcO@LFJvcfa{3|{SFUP)&?npO+B
zA=M?B;UscyOxoE^vCiVO-pJ%EE=%ZeGR%t&FBY|&(C`TFd>7%pIm8^JxV@3}C2=Lk
zLOt{GHE4?`6+K%y+3HCpTVm^#m{uS*C`JFa_%dP7xjD&5h;ZE=Un9yj`WlHtk|beE
zHYY<NiLl*q9k$XzmxL?2xtYQaWAq$ld-LbUmT}!FYPJG-?u_Y`><ZgDq+M&+C9^3w
znI4izJagF~iJuQm=*Vxzi&YYCtp9Q7zkYO(!0tLmn&HmkxF_%!7MUbq7jbKg;#Ny`
zi;guEHcJ7yXJ!wHEh%pHvikTsbc=*f2rC7muE=#5iJ_x)=1Re<Zd?zKd`JiD;9s+>
zcAd6nMp<!syZ*9Tj2nZJxE_&I2XXAKZ{!%e8|M~wn5)9BU;0{c!|WIe#W#U{^wJ{8
zN{E!o<K$0?bC@e4w!kseiRI!~XZ9sSgfj;bwm;0pNV-H^7~Cr(oMs~{@+_M9g{Y|$
zgAP)}){S%9Js3Z-@FG6CuyXEI5q2C`$_e)j8Qg1QDeiT#dygE~lkI1DkKz0Q`s`wy
z?ibJ_w?WQ^4YB2ZT<xC~so_c`2i@(FC(J>4Lm%|8weGEvC(IF-k=e6*2c5eFa?%gQ
z$5LZ+aciKA-nInt3%c6}QyhK7-rHaqOj~N{?R9h<7W$5L6T8r0p=XxB8*qy5se{Y>
z0MMd($V(qItaTpi?CmC&mQ8Cd>+J)yxgLHx+|5)c^dlL}-|p<(-)pzH`z*bEU7c>b
zrAx3AyVKp>+3OlKw%Uo%*LCa4<6ZW{?(ROPrOF~4WA#j1I}h3&owwSBxID`8=IpQw
zZu@ad%UZG7L!B;*v(IhWZ?`)wM_qQ(Wa)NVQ1TvomwPZTCcuGiC$S9l9TlRPnITRs
zYo(3Q(gt{?uCT=t>KSqrQ2bkwdAG|V*pD3T#vTl2tvPz|V7K$&fzB>_T<wRJf|*V?
zKyBV&PHQKz^a&P6x66e^MQKG&)pYh!I7pva3S%n=XEEK~Y9FwWKD*0V?zT8O-Cc()
z_Tz_9emk)o=o2uddP-v#gg&9eJ#g4Qr4eQ8WGWLzQ73foGT2l+r3;&58xwahy}d6i
zc-V|&;%mzwACy%>A0mRj*9hxqVH4POY?nni!ibZn4>f@~WlGW8*GXLTn@!LL#~*9P
zYW=jJ8J=ytA9s)hMvkW7w%{MLIPL`ZdmX3H_%zE$Rw7|nWmD+JFK-FRTYU1CYel6#
zdH&UjYt2vv=Jjet*;TE{XP*7lD!;Zapskzc+$w5qfw%ZuRP+}u(3D#;cI%{JzTYta
zsxjA>S9NZ{Z>$d(>!-O?+R_RQxzidhyTUiODKK}HFN<Bl+9kg1nyZELCw8|&5w9*g
zr=3i%^{3ZP+|>pffqrKrM$w}yA)WTEg838qtDqV>%Fd}L)93rr=Le1RFYNLeoA3(G
ztMz5hy_#DUw6t94_E}o+3Km*}W!2Lu@;viL9FpHLGR?_N=8qIyX7h9^ml0~umtHGj
zMdd4@x-nUi=1cWH<4x?^<nSFC^lf67q~+EN13pVDUi4}k<Ypp^&2Srp+EGoW*Z9+G
zCOTKcO+5JNi)*3wvCSaQTYNFcXKKYu!l)uxwg9D{cy}#ygLPWR&8_jJ&Aw_bJXd>B
zeZ}P4d)Q|_;x``&n2-3<js(+7Y0G-ZhE!@>4=d7{mTP^B*8~=C@@27$URn=1bni`&
zGx7L(*aH$Zr)_{{nJO)BM9Y3A+BZQ8<Q0si`b^dAs@?7DaQK`@10BbFwd|TG+zcxj
z1N~(BB7gcK+P4K7Sw9L&#|C_+8g?yQeWl*Fa2sCHE<JbG7U0XR<vx|=YN~mx_Enj0
z;VOU4>OjqE-;QqIkz>Bpet&9zAhqA8>Zhl+!9-O#Li@66MNL~h_f|dM)+qZfxU|Q0
z+{Zf2V1evob1OfXE&C)}fe&3WTHFblba*>V!_OuT?|>)eiYE~6zn(DfhOf)v3Hoja
z!s#q6*asHvgKRV~lBSthvk%I6zDh&;_QTz29&D3xcFqQbv+*R8%Ddp8)`<K5nFO|S
z6()MP3pX4C^zklOuOzq?*`c7<y1+_LeHl!=+dwZC!7RF*KqaVYFM);XDT50=XGd=*
z9VIwPoTnEFv?-hL=##6T@^+|7DJnfB52*5KwH;o9we;WY&<q!7;Q^>q-h{=3uhBgR
z;C0wYa}UDJyc6hqeNZLi-7;DyK#2}_w`DBsC)|+2+u5JvJ-^4m3p>4+ziXD`jH#rB
zy-*-idX&`veaJGU5Lx_eG9(LUtyK{&K1<LLHSPl|i7FNi4=Fq{ock0+b5^#MBU%rS
z^H+vjkD3dAh|b&j-Ap@%29kmm`u0y0N8Jai>IO9~x6sks?X)*ogh#Q?s!tT8)7{zd
z3k5z1N0CfvUGBca9fG~n)#u#yyiBM?cR8i)c6H!%(Cs>e6A+t;g%3FT!eN--U6C@S
zB=#<x21O@1g)MXnZdZS|`;btDRfRj5#r>?x2_zLMLLGihDY_0Fb>2FqipV22BfQSa
zGWJtyCRRre%HTq`$yqEGI(7&)?ApqV_aUa7;DW|%y6p6_u@t{?cEC9Ml<KNV?@KKk
ztwVyBUo|VBn&nf?3L4T+$%E#+Q`%s5{waM>YkXf@IH@fh?e}ZT1KRRwPNTjBC+KdN
z-F>PpXvltO=)R%Rz31A{mj(>;r#W6<A1o{x(FQFQ=c)phx)FUa-*O@E%8<{vmtDHO
zL4D?^%BQ#Bbu}a3S8(%{ok;NVXY34Q?DXk&UYk*aZtR9UewhfU>P*59@gkNu#1aNK
zXx81{YyTtSNr_fhxE0<f_v9)EvExHZmg7KB#RFz21%Z)rs|<@EC_E|Adz}Y7N>9q&
zk`p{bDvxRik7zKI5S0ifwb-f>QL7=^a2nP`Neu234BLuWxEU*tm=Z2BYl$dA>(TZe
zFt7d=CJicgw{2`+xp$kehKce4k_x%-9n6`Mvyejg6-z$BlGP#@gv*Q(!&w9X^kfe#
z@=g|I&?`N#Mrmh8pQ4+NfO$B<n&~6kPc@%v4(1eoIrmGsW9$4m^8z{ZPOS^(SfA-S
zH|NQ|r~CXli|{!spAMcL94kGi@n_WrvT9GQ2^w-nw@ey}e1@X2H5d4&+Q-_1#k0l+
zFU<CpHw228j-&>&%0}c;Yp)BL+cawO=?n2<d6`#~KI2Aq={7PXD17<?ybuYa+x;2V
zK!(+)v(5y7Fvr{Rl^i9WzvJ#vP=6oH)e-(a1)_jQG|14#xoPncweVAV<Q7230z?sV
zEONJ+?#+ObeI&&rM@!vGRA|qXn8U!d&iE9mJHgh}9<GNCeer{(*GZkpDFq%J)0|Yf
zQ|Z}WsFW%2TujT`CZm?{@z4vUo3I1%>Gvr-a{8eh%(+BA)Ab2+Q80ZDdXe{_a2-ia
z$WJ2%gs501A|OePb!|v?uYv)Rq>1oN4@1?2P_4Zaq<Iw?aZvpR1KVpz2DQw`SeTE&
zaJ|j}{{$!L|2d$3cu>A<o~uddV~AwlXq&)r9YV5JM6z&+CB*z2iw}kUNTy_+&H<sA
zJsMf{nB!AQupaeu!kdf_gT70Nb<oXOEX(NwPS}t#z(C&(^qLd0wn$u9E^}dN?`!iW
zwRxkf{o2xiwse})t4ptD=bzP#EjpKeF5_Ir)Aj!Bsz7$tskK*i>AsB0F*Opr{JOaT
z-CUn;F17Z-9m8Q~x9gn3Z>SC!s+qG}1sCcs?h4ede!q78WbOJZ8-08C`)j)bwOygy
zs4uJtSQcLB2v}AocZf-c0lMIGyRS6*47=DRSM3Uo19*pizYp%u#OYLtE-)oJ)1h;s
zA07oGPU#)}P$1W!E$M@Yp=d?ey@;0iHfr(tkdKiV<pgT<d8yi?$`1P(+Ia>em-Hi0
zuY8bc+rwN;`4Xji2Lz4zqk99!%5yCNWA(XX0psEeNSbhd$SpjzKA2x}YJ=1*Ft0#u
z$Dt%ui;ccuZ830{4Vf)gdT9X9o1J5wJIFeRr{OSOh)m;(aOY_6VJKeA-04*6LA%o~
z;CmnGz*jc+%uWhFV0Z~2iQt9)T!5RDXPEd-QT&Rq_`ZTF6#X*=)~T7m>5bg$nJwud
zaW64(hnctv6qn6Ep<p5<VhfdMi#m09Tum~W@HZ4d+<?eWxM7p>X(n>NDDr|tje7#d
zWuGZ~NZAD~YVNW+v!x&;{r_Xq@7ltgsHkop4+>&1hR#imlbc-XL7b%oeF)~HV~#?S
zBL;7hXN40dLHgkm$hAa6E7ZnT&dLFY=YFFf5>icjL^YAw5p@$A^HIoFC_>I8=0~1m
zT0Fr>oQ)S8gH_6xn8IEZ6*jCFA!=e8Dq?UfL^v8uqd7i930}c0%a}QkHG4#klUI7-
zScNaW60cxk`8j={aB(P-nHR7$jOa)7KQ$MPsr}|T0rMPR+MJo7{&<=PY)g!4wq4oo
zE7*pYpB_FAx{&slwan%&&(3UFCTd@yL{~;1>xZ&Ucuq4C)$q}%Mun4?)C8z@(Q%{u
zmKuO4!)r~<708HcNbam;QBSElAxt*8qw|+|%ut=<P5*J0E*h<1s4O}VifgG^wD4t6
z4p->fKS>ee&qx$JrR+QEK77<Iyp7^dsa;3Ay6m{>LL?zfXbNDC27?*V-HWb0YB01V
z3v3f(88jM`P;~7=CEjIBnGbb3@q{x5*@t_(*&+`G7g&d*j~$|LmzljUVTBW%7}*wK
zV3AscmtSQGs4Vz8H_fLe!$H>QBA>y67lu5eJ0BTF5Ex=<#_ZATN1Ac5z&PJ$oF6Qi
zHL^ZfI%j0V$92JUGtD`jGrDA~&7WQoNUxaYKwone@u3(y_;jk@JTG9L=S!P66C<MI
zGw4v#M_RI4=5m+kX0~!79O$2p!QIuuzcB}4!~Bq!5F<TNlYB^%Cot=IKCC}_p&uOZ
z16p_-ehvS@o{H$)0ceM*iQ5KXfh<i7J4JUBn#w6|3s=q(`RPLANenP&%+NzjhC!M-
zgg&}LCb89<bioj8f@>3Z4Z#OM-x$D`$<T{lwbBE^<8B$AUe2Wdegt3RbtfRryQ|$w
zn@@m&*2p2ng3y+shY9%+)eJu(^YHYJqli1*SxC<ow20<J*HTFjcSBBT;v1kz>RE7r
zs6=QFM?H{QjC{}wRp6}1V(2Nw{sFh$6^hBHG$BlH!)atHt*g&*xYy3szqW+l?8InB
z1T`X8gmTOau~=PT$gYsgo>LkTl^tm;YLZ6lp;^>-0$PUu%pm(NR>ZfnG!s>yR*#nZ
z(@FwqC8rb^v`lk4eABv`Q*btSY~^!npIUqFpuco+pmg#3rHzxNjsDW+Kxwl-XL%rJ
zImYY88SX7_bkJw0z$=(lJT^CwHILys=b_v0yWLk<bD`94Sh6c%Sb}Rz`kHWvGRIfh
z7_c-kFZZ`%o4sxb!W?>G2}TN+Qb55srQ#X)Li)xXP?^<<;WI;`_#uk0)7n9wxC<)j
zygQ++0qu=bXlyD$fXLX{@%X4*$i`>Mi$1Q(E9t|xL1se-d;1kXBMRpjHir%>^dXv1
zbGY#;6(XD}h(c`od)r?S*Zx9jOZa9XZC4xZeBnv>T-%j9-;uU7qut24OYHCG&~B=v
zzj7y`C+^0jD=mF_7nD-pJ)ka^J4-)BnAE5e7y`UsTWo%87a0CtBS?jkIlo0eISF~_
ze$016tzI32R*yaenV#t1HAKKRB6ALWiqvUwQX^;9-OpF(bb9R$6gr*$`3|V18Bc<V
z{@+eW6Ls3cJN+nK6xi;P2*hzeCi?dbxFH!2^o+37&#O6cym`C+@VRKaM*;!fxd->9
zqPs=nzSK&Lhq)e8L`H%4D9|HwJ?2OXF&<@$Cen@9%=kBX+#6$m`w$x9wpe@OqGpuy
zF8tHRA;s+qJeZIXy6cC2?zn6h*U#FE`W?Y3eIn+tUb;ehna$&As^<3~m+4wt^&a-8
zCL*cWpQ>d2k;u31Gvx#4<DVuU$%@BU&gX)!(wJBy;EeN3Bs<*Tb`coZ_0I#)k_MfD
zIE_aRqTTLZLdWic(z!$(7N8u<MeQl1NHrgafi1Dl#AATu4k?j`JYq3|e^eB=keA{~
ziHve-Ib2q@+zWYG^^uA=ltO#=m_YLAmV2SOMY8kUQxqwPTr83Mp5n-zhd%)4^W#@{
z#!A>$=t^9hcYiLNybZ-V>bMVdu!IiY2j$+!gR1gHD34@hh4rc<1=#CCHj#?@GE?7u
zVA5N<@kHd)@E|*)5gJ?>9i`DOprFxo?=N4_EXSdXX(P%9UP>4gDtxVz43j%yh7)KQ
zQn=@NpInlg?@@G=MVfGLh}_`>j~<O}Ph{436z7ZLPb+lsk+F@d_~ICkjqHkII&wc0
zri(#x?46Ovg|ho0f7MYInz0>B;SomeM6znIq6If{owqvs`kfYW)6>$|)pb<BZC49h
zhqt&7*)6!xZtrx2mSidY&-<ZlSYSOA79+ucct5-UBO}LI6U=*6lnZQYJeD;jcVaLp
zoM#2zVnmGT*Re7mvd4EA@iFpCMt+E7DkT(!x<V1-6ife4Ms{;eNFQXK2(oAKMe0R%
z_f<yN-u;w*D{e)sb+h#lwsHGomNCr;3-IY{55TPSR@V3?Ms8x{exCmC15nf~t|-6B
zQeH(830+;?gZ3%4)7~F?!E&iEPkb*!BEFhUsX97X7wPME$knsEjt=&k>-w)0ddC+b
zw{QT|$6=axfqu@llFD;i#)@!_*qDV2Mbn5xI`KuYthAQDZ*7{iHeEdGx2_CWSB~5i
ztXXz(N1&!{q<yU3pIH^mvW{&CWG%qgE7Lv*wzgjx4zzZRrUtUB&m9S5&ktlb|A~I|
zMX22Pxv;vHnH|WSJ1yG@bpC@-EHYg_X<dHF<hQO4SXci|@&!#f0aMj9ZnDyc9|X(i
z%XB@^z=P1ruK{X%D9pTm(prD9%x_&DurB|*ByJU%u7GAy@OGx+&rKWk^uve1HayMi
zO?6i-rDNLyWlQ~*WdX~wQAM!4^8NDG$@11qi~Qwl1LbQ+b-{(pFPQ@i*Nmo)4fwO?
z2lLqaOx|L|MGk@Bsx7{)`va@GM%x4V3ogtL<Sz>3ulUo)SMzYWH^qb}03sxqR}{#b
zH_a(?>VhTK(bdD(imZX6g%=gRqDH)~&YI_|+3laTCopS|&$1`D>ZUl3cnn~)Es$U1
zlU)A%nvd}_ZNMhjcN%BT70q3qKIcMdV8Oa8w@xnD<y)|8npY+EbolDRB^MPhuX|-(
zxMSEi6npkGr(j}7NJMOTSnO+mg<x7_!UWTW1iO$uX{z^`>aXTo#xeuL#Y_D8b%Ff4
zk(EJa-ldb)r59KFt*rrT>)!#L3ptaf2A`=R)X0oB8Yiud7wvv)Tfo}3=x=G{V_Nz!
zlvYTTa<ychFMr<kv@+5DFqA-P&}0pm*ebQDI$&Cc$Vs0Q%&YKaR(y0&#=O+Wzdt0S
zx1ENfVf0%Z&%BpNuV&|swwzI(Ri0{jVC_|7@j3Z<%@uiI%|74JV?JZQ-`F28_WN`*
zFVy+R(+b80Z(R4F{~X<ryIaM*TicSkQO&)V!sEkxYR#r>*?Y!@O-7kduf!CeQE>|<
z`La#yp?cP4Gw07_Mg3;Y7QM`GDcPcuP4cYhq)LG)lloOz_z)~>)#2CF0wdB7l&r=F
zs@y&KvJXn;?a7k;I34i*$61O!`BdixSJ7|T<nVhWQyFaPXmJfVyJGXl1^R>+>iGM$
zH0XsbVDetuZW?}>Re6PxuQMV}Yq5EZO<|$=Nu0UF8AzP!#5wF|ELWWB#98SR_V^b@
z*z76HVWg6gudp;m8rk~D2AuOwM1JzPX<n^Zi&jaXX8OZNplUcxaSP^3XlaI+BB8U5
z3LBP@&^c<w4v}Y+nr|c}O{^uMrD^F2Nwoh_s3|ijGN!pGDZLF83|ettgEIQ|qfh`}
zp+A2VehPo2?>>g>{(w8wr?u2d;fwTJDWK-R&(UvE_=XHOedx=hU`my>ht>&8P*D|1
z`j=5CrnArB-?po<>KQg2e*?980yU?hXU@Q5-u53AQTvymI-{jjcDaCWDUw|-QsBLy
zl1)7GC0MP3@6rE$3VsFuPXF~E;q^3eEs#YR0^^9Qg3mq;<#>YU`%gm)uaeR9XJCm^
z$FlUa{TXmsGTFU}5iy)dwMWk1bvQd6cEL<Pd<HjRtXSZjcB1xK_^}*(H0=ebr$e`Z
zMuY$01eblf!L9aw_*iBG?#JW5$T`c$zZ9NV#0QB(ymT*?;Ldzc3OhH8+xGOs?NDmQ
z!y_{3W-l%^$-+Cds_q%5Nl>+S>;$vf9gU?ia#o+$gd%$UuLOm3(|wShhlfiJ@ZoI@
z>9G>6JC7bQLZ)}^&RO(uE@=6|IrM>Cu+XJng$}hr=wRCH<Y`MD8td(^LRoGZQ`gNb
zzT3~>zmt(!EUlcXUW7SCv+-Vmi^i_Mgp4WARA_#hTX?qOOy$|iQwDnIMfgb5+R)m_
zz0p{<qHLn;C8(A|89hA?KZDtH;1#gIFX)$E!Bc5v^zB!mR5>@q{kN~cLFEFxFVs!!
zdlg=m<N9mHYj8V$J^UITLCRsp7+Is6_||Li1P_%HyI+S?P_|&AOGo>@4ZnncqPFiK
zpOu(2teg1WcOV_+q_LX!FjUHfrR;tgk|{%1r?aDzkPe64<-${#c*LcR@|R(kb_FZL
z$h|sx;4(5@N;kg&v$d;PIwNOv^yC|GXFg->LIq(o8uAz-cSfGjO_aO|<A7Cv^cFm-
zWi5)$KBb#D_Fbq@;zHxtzrbc_q`&zWXeee~5SY-SfE7C74rZABjNHspchU{-;8CbX
z>UaloluazDjXv-WRBBS$JtJ>UTz&^^a^)es=k*iCemDcF9r&=Sz2XdgV-jlgQ|fMv
z%H4f^y)I!c{oN!Sg0YFcKfuFNaQnpL0r(GqL3+oJz<A((X7*25^G~ol@%Y!*7$`)a
z$b3O)46?B!Fh~f@a-w;JSYJd%idu`QUA`Zi7X=m$(NBH^1=_o?CbH&SDh(}pAKcnB
z9~ZuY^vU<p{x7``>WNq1hcBe$IMh%!%_Y$NM!M}Muxj@f1<OuOKN8b_C(Z5A$e{q2
ko+Bhx3u$B9raAnYNeb!je*$w1X3$J?QL=zmUxPpXf09^=hX4Qo

delta 14238
zcmbU|31C#!weP&ym#j0{x5=ImvXKyy5FjA|C9JYDs3CACkT5gBmjQ%M96)3%)xe;a
z_M?ppG!|6+V>@nG#pUy>{m$^GHAZQ%ezmo&4Xv%VrR_iWy_sYt3!mSYne*<u%Q@$s
z_1^m?7l&1+FRG%y7Zs)D@M<(fTe#9=(aA7;tZ=_eYt|BufHlS(gLmE<Yl}0-0VmeQ
zo8x)T65VBB<Jd)HXMp3*;#a!NMzLRlIg!YT!kT1DHYej)IZ;|uY^ml{v8=MD+0xDF
zVp(m?uw|Mv#j?hlWiz?VCh<XQ&9>#3bHuXFnrq85=ZR&#HQ!cXE)dI6)<Rp6xkxNW
zTZ?T|%u}$;kr-=<ZK}CcEXP{QY~|*1u}_?}!d7Xnv`sTlvsIa^Y}Mv!>^Idm-8|h^
zW3I8yFwd|x*P3f>b>=#8aJ;qNHq$)Q)?jY1%`(rj%{I@r%`wjrkr}LwwkC6vST<Vc
z+M3PHz@c=YtHs^koh&ENKF_V^YqguX_LY`7mIliz_Z~hWX*Ee`V3LFOHJzM!Ew;?E
ztirZ?_$0$>lE~VyeqDI|tkonbIL3_Q)|2G@ocVU1yM-eu`#F+oDL*4aE#OzW%y)=w
zY1o!-DR*Dx6I>Zs&K%dTP3)J2ZKiQ;8^pG3Y|9zf)=qMfgN=kEdDcxff=uDek(+$%
zU0@+&xUuk@yHA$1rlS*QO`t<YiuZ$gGns-owvZBpon#v+#kwwX7b$15N(oJc>)=Vn
zeua4}5y&*`VbyS<Khi_0u*P;=o}RAZh=bJNlYKKs%Fy1}Uo`>LN4L91UhdjPwv&3x
zDl!wXuL{F1Ck=@GE`mBBv&Xf~*)KN>Y#egYh~=ITZKSs&h;J2X!m)Gl+iY==7K@`p
zPv#-c?PNaAYzNs+ZrKkm^G<RrKHg0hAlyY-5$+}n5q^U#Lg*BcEk;6evIGhBlBHOC
z57{Z!FSEGFZAf4_wyVhsM7{^nuG|mpd&w&7x381y0J0jVe=oV4tik(!vKH&_BkK?z
zAm;rp&U{eR(0bI+?Pqwj7W@XS<q)|8yB#KXT3k5KHY9(9Y_QC>xY)eIWW|$qZ2KnJ
zh`o<?awzF)QjH96LW18S1Z(dn7K9Iw4upMVGs0tJ3&P_Kq!Vv$(uMFuNQh(?&Mrb7
z#A@j#Hp_z@N@B-ZK18~)KlNlPuk*0dlcqTd%C*RJ&m6ZkW{Ue`?H*X;A-a74ZSK$X
zd*Mz`XVl*TwzxOPJPw;Zy4YcW?e6PwPlCg9CLZMA@(d)l^YFM^n{pT)b)QH%0QY#(
zQu6=?+>L4Pz%!nx^jaRCchArK9=zcBA~ONN>yewBJiPANoAYNiT=U#r@~RB{p1Shi
zX(8WJSpVZ_l^Nq-&uemb|B5t^VZi|{40yh?;sg)RdCFIR&BM<<Me9<v0G{SeU&*1?
zGjq!{EiCldg(5lp!$WuchKENy_wI_~;Rl{$-?%D=|MUoZOgvolxcB`*4!`#tIQXR+
zbe?bY?dD;T=g-GaMMJm8{pjgv=<w8?Kcj*tJXgI$248sQz2xRW=6U}@Ex?~V@jv>h
z9I`yWdi{+knBjTKpOXmpdwiei5}?NO;q@y@xafUR1}L#h-scri0PvRg7fR^jVbI&E
zfj<I#=AEsBg8*N8-_pb1WN_VkF&17>K!(?u2o(xg<b5g)R)O|pD#uZD?q0dOeSKmq
z;kSy`)dxb76C9lTqgN9&G8Vy{GU+Kr&YsYV6oj+NdKKbb-m4T_jp`Qejvc)!2S+Pi
zkU-zhfan=I$ygb5sw0z*LqFOk5f;jbazFO#SGlhx<<Q1VNB{|0zKRh(YIekpjv0qf
z>Y5~V-N|!KWtS$@UE+`y%OOUgbt<~F?!#|oQa%d|GISO9;)}`gz&S>}h9hc6ocqJU
zRON0&K{Rw@7Tl)dd*x0J=l5b3<b#%8&BAHE(*<e7$r0T-ePjUL2Sn$Dw)hYW_5>$S
zqJlG3lc<O`6^RaiCNWMJP1id(A<t1vU+aPb`W+L*WQ?Qagni)T`n1At9dWesFr?C}
zCMX4+R4&Sfn5Nj?C=%!7c5tZPEspXKSEv^t@sXiL$RP%#pB(Cg^iv2=o!O8fGoI7a
ztJ#p1pAb0)vF19eLL*RQiHIT9{ZVzgixol*Bq=C>bb`Wk%m~%vfaGwJ|HeJc6Qh$m
zmoiqU5!g~Au;osO&4slL9qI^~zahG`2y}H5qPux)S_zv=kmfoucQ=nNJ%YKJH)1YA
za%}$WLJJ93L`DR<SyCScZ&nea0_Tj5i@+YHOr$@e&CD}$%whPIu6@v_vA2+{pjjRv
zCZ`^aEoq!>WIOdD4(vJJB1lfysNxWAoQO24iD^D-0?kK}+@K63g5e4Z5|l>LzC1|D
zVD=g=^$4CspKy)w3E}eynM`i_+1PKy+a{lA1hxrGKd5P(_ZW1O2q4@XCO7??$L1P!
z^PufdVEQ+YZWP<(rXPXLb(5l--1H;RO^$66)0gBwUi{3!&y6#EmPtmhE>ozygX>XY
zrpT9U6XR+#DPRWPBPWHTp^p!pMUujoNhn0V^C$F_;xGi^o)SbbIvy=1KN#u9S@rsf
z?63!OU09f(5)oa(J!>2(G9jy@*ifoua*f?2x;~}-8Yyu`VRVti7w*Y7#u4-?jGts`
znE3vMIg!$^@!=8oh78b8vms^P*wB(Qi4BS>Boiadq+d*dq~fxmZhl3|MShGTsxWhK
z>QVh=ld1oeQPrSkCJ$;A*e^1ul}%)m5!h}J)W)Hcc>EXh`ORYs3u=>xp>gQQgmb;2
z&L+l@=^B?Z74~k750xYFLH>WN=JLt3a8-=BK$yl!Rd^2;Kv24sV$P%=XG413So6CO
zp|;75po*R^h6LA*S8<c)p2^t?J2x4#u&Ev&dP0fuUqKgAdYY)b@USrs8%k_?<f<5B
z23t0y%n1c|w%iU2lR@!}%ME>UJMJkW@<VnAMKC|qjOhQr$PCesmylZVF0E$5wOc1G
zpHQbY<JUo?-W)b8JgnV#7@JtwyhsKK?>|#ATiQ1TQpRQw%xRqwVhFl@4@<wJS-(M=
zVUM_z2+f`ZR~BRP%>+u9Mj%t*8WV?V{?9lRH0UuzW<^+5_}&GH+<?Yt28o#8?BM(&
zGv}NLBH??GaYQE8X9Nw=dXfH^nKTB8j5q3WM9^f$8Fd7SU~V2mp(#j#<tbN$p+pRx
zfZT*@rEt5LJE7c~BjgsIUF+BmG$IpCEVvs+=bL1-kGJo|;M^iHgN9y)=MXYwR}Q_s
z3esHjoY5G53(0(EG?_Pg+JJQ#BK2^my(O|wFetZ7;J_iQw?>W_!8&58Xb5Biw4`Ms
zhae%e3noDu9x5ilib5XCtcxw8T16QYBhuDU36O<B^+|I0|Dztey`3<p(1$7^t@y@g
z0HJt~%S1X{2{}a*%4SUPV>V68Dk1BRaiM%n7$2D#YUeYuNQ}JFiZopJi^p0lS;E*5
zcbU0kbMX%%&aIX*Uj9GlE|bu@;lgTyby_goj$&g`Puz-#tltn_n9Rp)vPaR~Jhos!
znjc*0hlhfjM#t6^_~pqmT2Tv0V>6gg$fPVy8GT9-5qlPd>apdvaf)0vkv-W{kks-Z
z76Vx!nnjFbVF(lM?8wT<<5P*lT+Xp5)QRKrIN@M#jAQ9&jU%`xWHnAX9`&s{#XDop
zt){E%APZ_}(-BBnyf|W^F=B^3(PWKd<tVPMl293Kx5}|D^u%#prJ=IpcKTr*q{OX_
zTnF`+izm88^^nD{TS7@aT!e$Pa3;LQSM&7qnUJOscHwb6O>BTq=<~DSIeK(9B-0mX
zLzSYFO`NLdz~;naxnRdLYJtsJ*vP=+tgU+v^d}E#ErQT3v^jQdvkYl>v|Bq#yQ8z)
zPFFU<GKFvwJ9^)2gmi!k`dJgSDuixCKr82hrS>j{O2o{LYlS2Rj0_|&z`~S}kK={=
z5ezA;-R-1D_!gu5qq)$MiD%86@D+9%irds}+h(;evTa*;whNnkgdH@y8O~%JVSN~I
zs4yFMBHlgRa5QJi_sP>g@yc89v^H^(T9JQ6m*7h*daBf~s}1OChq)apddoa`gWsT{
zQS)JTYSPK+z8I4~#&pGy>Pssf*yT6W1q^k=oQhiJL%sL?`S4HBWb`Y1aryqZeD6oM
z!cw3nGpOm?tuWVnxE0DEDe2+DKx(-!PW)8Ozf^R&YPr8^x%Zhxu!sjAy}kshyc?I|
zOM<*>bMoR+hg*S*&mT};jw|!Wm5nF;$7Rs$-E|w>9z;FO7bkvd7F=3?xn`}uX07+<
z%VCbpTd*3G;Js%JM1x*rqS_b7KJ>x0Fw2|14&DPrw8^J7(bV<eomzlG^)y$s<Z*B0
z@hwHNH>TS4LM``)TJ3g??2r6he!E=uC%FP2HpysX6c}mz?Jx{yys|ssQMuw_l=su#
zC)?qNa(I|-=s;2Y8$I6vCf&D~IvDV2ydQKxKF|M7L-|g)FLno#q@0B_1L4d(F;LnG
zn^mxaD>e=p=w2%rXl55A!CSPZ3l`D`@c5tRsUacS5rrdzmm@|xd5=**G-bd&lo}<Q
zU#zBvtx!$ZZbbxV>9(!VqWp&j?V+2#xD`t2@3+F!sHjJ*FbAHdKes}yQim`5h4VDs
z1{V<3kPX(rD-XG#OvXET+P)o<=wFf{MlMWqDBQ;m8#Ne5WQ4nqPjSa(Br9Z2S-;#}
zxii6iZfBfp1x6~IhjPhZ726#hrL})mpog^A|5|}u3)>lZ5J9nGNJ%W4x(RwwkE301
z^z7_(Y!UFiJSXgBV)L_3a#REJg-u(w+qVv>dmKWWwbO1H(lC^5T|M3Q9(K%Y!b@Or
zV<>S8?cWAX)8cVLJuG(Y;RX%KwBdL|zsheY3K)tGsRwn2_jH+;b(tr3`gMf?UEwgt
zYpQ7dT~MU<>8B0G#2pgo+jl`44519l>D7B7h0Q-!h0<`~H0W?LB#W<XdSzm(3ndU4
zitrT<Pvl!=$N?wBJMoQku&j52Q|6ST=vb3@^(vewu%OU{AL8V9t8*NdyB#_8IlK9s
z-e3O{FG*-J;RS3Sl6Q34iSQd7U94mhl5@gKJU!6^)8$I6q`n?lp-f>SaL@<#L1M>X
zLMk;MH}`A(2_=Dql0(|TgsjuK{aufiKT+;asKjR!QvLDzlcc}ZZzvBK%9)rFPR3u>
zrTKJe{px|ak45)K4`$@_*AHy*<<<l;W*k~FXvp^Iv#%Nw`lbm}7~N&}LZW&%s_>wU
za`f2;P;SfY5DPFOL$sj>LN)|pHk5<Xa`G7QZA8H&H!2;h+a%Vw%Rfk%azsXyPOfX@
zY#&=&9Xe5rA~+<&m))xUYKQ)KMv+pOa#+@{5-s!X;W%C@(oZ}PXF4J~EJLAdq;Iiq
zi70r%%S`Wi1oZnm;VmekcijzjS)%ZF%NxpjW*4hP6$me~jzMdsKffCi(}Ko|n!uI|
z3{;^cMEj)qyYRhU9#e>qqrJNzX{s*vJzW|~_v9^pU0y(!H_T}?saKNIPigz755x_`
z55zxF=T9yTB$pmqKtJ6DdkslxeY&ejDSd)yprZnZ=g~@C?%EIZ^yyxxh9A<wZ$K8k
z-VNnbpOj17j=I$z6w{M3(XB=~?O}FG=Q&}navEcD6SJDaOqpgi3>wl;-VrdA3^WA{
zWdl0`hMMyTX8ZKB>7Ij-9rH&f{Ie-d3hp(9u_=Wvy%&QH<7izp%R!+Fm$`ep!59M^
z3mqfcb^B%*K~4`JfQ%&yNd>!g%(qR<x9d$q(VH!Hi_q?{kT&KK!cS1H#fq_$VkuVG
z!@#@PF3l$S9$2KTXS1=2vysbjHuT+HkgPDI`!Y)D?t@UM5d1hmv|BjG#7O^f5R|+_
zMSBmx)I#Qq!aEG$HyFGa2f}|bFiPvX3zjLH8Lc}-S}Eft=L{GceEJ3^v>ZJKWA3bB
z4)-HtlR=W$2_(MRxPr^&@C9+W++?C~O!A<8GLsU8FqVbpeK23Skg>2*WFaQR!s>t_
z&!^7|Bdn_8JjN!KNSNjxfcsK+tHc0XZxYOGp5kOBO7xWjsLz+^&kleMR?wD%@C6L9
zr!4x-L-<}=dl2%(5}3V@9)fAISW#vo2FaJ`$KQmK0#R>zNpCD}MfeXsI+;Ls(dwg+
z!wc_Hauk-q2Ja7!!UurS<0BaulYfiDD2L9Ecj*$W4m4c%ZP`Y5YDSC<7t^%yT}Y(M
zdLd4Mv07-Ndk<soI&Q~B9!p>}R}G17(@?sr7bA9<9dZ~3<e0S32ATT~DK_qMSbB!E
z-P;}8wmVib%M+89QaH<+<#txXGE^u8V2MTMPz*?6(-ARgMai}?n+{7=^oc&0=W;T2
z*$@jRDP2rJmp#lWG!<8j$tR70lybkZB4DgIG;h$DLKhrgaB{QXSP(E4U`31(Ggef^
zm5l6D<^jbsn(u4Q7y66p14Z@k6*XNhYVsE~2a1~g87+Z~mOkxZQg;8;KvKCcwtO^l
zY(hJtAIl(~|CNS*cnlU$+k?1TQc&lEP(?Q%hkSa^cR*7vv;PDoj-928D5c#49>Z{5
zILF9ZAq^8P6j#cakF%dr^vUd}!aEA9!k>sLxD{z-!%FDm9yF+>pF(UrW>1zeMFIl8
zpYIocsu8<_Z7xPpH_{CcVv<%%JkN0ld6voSoYbO`T4GLwT6p`bQj1n<`9)aE9(w*E
z#HOR~JOouU^dljP>l#TfB)oRycO@en@%C8^`%?F_e~{+EM5u$mOC+O3lJ3g<ur|d7
zCfws+8~LtS+J6#K&_tO6AkKYkdIDYcDLykCin~Vo#ba-GeWSKvH_F$M8PovYy4X%a
zyvDWQHej>`GiDK!cM<O>r5jE{ZVK3ABGeKa6nv?u7D$TVMV(@QFQRu`uy?Z4{cb^$
zdrxw7PDmp5=OTK>2YcS>sG_rvL*8<t57ww0)gczyF9?VMmqcMno^Vb@#DYobREB7T
z_j4-fl7}HJYUCserIJq1eH*?1{)eHmDePFiYeaI0AS*<?%M>a(`3^WY<_M%en{6*|
z#T16?^zfJT|2i^y^bxeVDmw586uR_V#bu2!9a+i=F+vi9GSZVIoO3dBD}ukmX&dR`
zxFysVTT(kQ!9D@^V=U~-3)P3)ZCie*md&D1ezYGq_oDRKNeW)LyH{flicrXy-G64U
zilj_RpisiY-a%~Q*WpiYdsX+SFuscSov@R;Tegnd3B_qkRu=OD!QRpyo0j&Yxi9O-
zA3-?K^TjN*Ti0&2Y}-=WZr!$}T{yv#-#+?GKjgdS3l(f+4Fk6_zz!6Oi-Z-dypaL6
zRTe&Bn9efr3IlH;7*e;jvH0EACj6MyeZ+u|HK?R;-y<&EgrBkc4;Wa_FtJU!5MYmo
zu{2FM!k)j$z)=R+9#R-&&7U#Aa@<hV3P<<01rD};VJ8H~9)*ImbF6hP>nx^Pc5otm
zkAYT>UV0R=3dF3TVT&CxX$%DuNKfZ(-1gWlJKMyBLci;WWclZajJm97-;IxfQgwjk
zlNAcO_%TRLZv{=xFy8`EQ?BNe3@q=@>WdyUB;iVKn43ZGdkjpe71i%mEWBK?@X|bg
z#qvPK^1dbgb$(+he(2|qK}kZz^wFBD#^iu;>aeVmlE)!?jw$aw)2z#;S<hSirj~%I
zrEkGtlBs|0aaW&fFe~@Fz2E8mw(FG3ms~bjQ8$WYFd-$7P&&*_q4lT1l*{NZzFe{R
zlEq)KGElK{e8)oC`~=L!6~@6Qg8k=Tu9*Moc7Mg<K*i$m{j<f6H1Ra#Lgvj9NuvX&
z!R)${Zt6D%vS;|yYXj-Eee(t<7Z1+kg!%L>fxOvoCIy-{`13aWbW>MgQ?Ea-_fH(x
z#NWz)$!;6?YaExqfgk2L{9&mC$5OdcQFVUav)bphQ4zDD_U}P@6XW34%N4g?O7K@K
z4OA=*cJUcYB}tx7xtvh%OQ;uFZM|I4>aSQ7s8}@GjaCgnp7Kg&i7&atyJrA$KzD6{
zXcJ%msg<X1JqcN^9Jc$Fz+g?op(O!*PJd-UpC8ayN&7q8;)Q>{#%oxCL-5`+4K4R%
zvvzqM_j>MPCEk2f=IZfwc?Qp5UFwQT*?$`hE2hf+y8u{ussbTfe1tCg?)RZ1OLWuU
zqBRf2GoNYd*=64};yA+7bmdv7<pWxJ|5;d$$1fk9#kalBV0Yo)8F-F?5&xFFnmM%O
zuA(1{o+&!>71kj7yy&RRg@i8|U;#wjRf#7F!jr6yfxok(6Z{_mZqLZyFt1i5E`VW9
ztr!E1wC^00x)h3dT+4+(v__GL`@0ZG)U$>Wkgim$MpR)iRwdSi!T7LNsb}o?8ihED
zLAvHCC}>nMG9$pa2o$L1FnBvJSL6?K<AEfFC+VwC!6)zqJ@Yi~ZoWoIe4wS*pN4O_
zUw%o$tAU<)23|1a^(w-Tq=*a;rLKJnsqVoqqj~&Y?XvTbFavilIGhvkKQ_4Cg-v#o
zc!p}~-n41EAXtcL=ax>Z#UxnTNvD0YX;ZhoqjPgT9%J>KhisL2fGQNwW9MOGJckSZ
zU$QXgLwtC)$?K@%S$L>|SHz+n2f-C}lrMV51;g{Vy^#2FqF>z@P&fM2jf2rKhg3oH
zrO!PJ)$wyd_F6_(Q-kca1_hRdfAZeHJ`1;~A&$;{5q<+xY1vC~Ay(X+hyk9F$8E~r
zUV=hgE9brpP5eK3+VL{XP|8@7oId_C^eA<#tf%Q0a77nIYcAkUG!?7<p!FWQ03YHH
zyQ$?O)Zu@DIC%G~g;{Rjr35WuM{4p;aOm70y=kn+V<J2fI>kpG$DdP1zTxQQ9WfzK
z!t9T~=;~6lw=%f-q1oNI720zr#K#l$=xP?1*0O#LI}$NyI=L~2RtmZ&1vf~oy-^zN
zIi1u<XBkiqgRetOR5&R;ZAgWAs;DKxZKzs0^9|Swv9uu#%IK$WKz?dE6Tupm88>3P
zu$F-gR+mZh--IbOSy*0*E9)Mk=vl+^1cmmK)XY=G-!3^-awtYrVb$q!Uvw3I{Oam}
zy4t6%ruH}CnzlKv*~q<a%x`Y+cD@B=a!98qe+-}Ep49a=nBXeye;bdV)9Jf!WAw|F
zi2mhm*sLtZa;MJQ@eVvI$8*kPKiq>i>c^wnSk{dJx6XUf50CJW<82S%mPlEPm9OaN
z_MgJ%fZK<kA)W@T`mxUYlb=Bx#H-lIgE)&~nJ|r&tLX51xM`kA)q}83SI?>#2<WJF
z5OGYSYu<+<-E3CR!2jszk@sO=I>Xb$+$4CGDIsGRf35Qt4S@%6=!YM`gSsj<l!4#r
zyv`4ySP41w%rD_KJoEkfmr%cfaV#(%MHIz)VJ~yOjSMVdwM!XbL3qe$YrnhA(Y@7T
z?`iA6w;DD)kF^Tbtew657>X7h)b0rO5vH@2ne_O_P@+|^G6T`x|M(cpa%CHq7wf%+
zzk+XrdNDrCUs8OMUivju@Io$Cd=6XSyI%X}c<Kzj-lu;9AA@TbGmFIx5C*m|aD;)s
zGG^Jqx_Epp{5vu$MPwF-1knwdM+wY~1g2I|cf#n*MX`%AmUMz$hU6Q&yR9P20^?OE
zrm<f@vThfa5kJ>+T|=wB0EgG}1)Nr;*wuj1aU3LZ^sYa{{PpW)ta)O1O|1Viyw$0c
oLk0%h5Qx@8Z2wA31Au`m5}0WGS1=_e3ADr92&kqDzk;v+7n0>Upa1{>

diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index e63344492fd0..e7c76d0ef74f 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
     }
     status = ""
     while True and (status not in ["succeeded", "failed", "canceled"]):
-        time.sleep(0.0001)
+        time.sleep(0.0001) # prevent being rate limited by replicate
         response = requests.get(prediction_url, headers=headers)
         if response.status_code == 200:
             response_data = response.json()
+            status = response_data['status']
+            print(f"response data: {response_data}")
             if "output" in response_data:
                 output_string = "".join(response_data['output'])
                 new_output = output_string[len(previous_output):]
-                yield new_output
+                yield {"output": new_output, "status": status}
                 previous_output = output_string
             status = response_data['status']
 
diff --git a/litellm/main.py b/litellm/main.py
index 1b2789f75d83..e343553174ac 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -485,11 +485,11 @@ def completion(
             # Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
             replicate_key = None
             replicate_key = (
-                get_secret("REPLICATE_API_KEY")
-                or get_secret("REPLICATE_API_TOKEN")
-                or api_key
+                api_key
                 or litellm.replicate_key
-                or litellm.api_key
+                or litellm.api_key 
+                or get_secret("REPLICATE_API_KEY")
+                or get_secret("REPLICATE_API_TOKEN")
             )
 
             model_response = replicate.completion(
@@ -575,7 +575,7 @@ def completion(
 
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
-                response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging)
+                response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging)
                 return response
             response = model_response
         elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
@@ -769,7 +769,7 @@ def completion(
             if stream:
                 model_response = chat.send_message_streaming(prompt, **optional_params)
                 response = CustomStreamWrapper(
-                    model_response, model, custom_llm_provider="vertexai", logging_obj=logging
+                    model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging
                 )
                 return response
 
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index da5994ccdd17..980aa14f2415 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -643,24 +643,6 @@ def test_completion_sagemaker():
 
 # test_completion_sagemaker()
 
-def test_completion_sagemaker_stream():
-    litellm.set_verbose = False
-    try:
-        response = completion(
-            model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", 
-            messages=messages,
-            temperature=0.2,
-            max_tokens=80,
-            stream=True,
-        )
-        # Add any assertions here to check the response
-        for chunk in response:
-            print(chunk)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-# test_completion_sagemaker_stream()
-
 def test_completion_bedrock_titan():
     try:
         response = completion(
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 10f772c25028..495630300a96 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -9,7 +9,7 @@
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
-from litellm import completion, acompletion
+from litellm import completion, acompletion, AuthenticationError, InvalidRequestError
 
 litellm.logging = False
 litellm.set_verbose = False
@@ -187,6 +187,7 @@ def streaming_format_tests(idx, chunk):
         finished = True
     if "content" in chunk["choices"][0]["delta"]:
         extracted_chunk = chunk["choices"][0]["delta"]["content"]
+    print(f"extracted chunk: {extracted_chunk}")
     return extracted_chunk, finished
 
 def test_completion_cohere_stream():
@@ -199,21 +200,120 @@ def test_completion_cohere_stream():
             },
         ]
         response = completion(
-            model="command-nightly", messages=messages, stream=True, max_tokens=50
+            model="command-nightly", messages=messages, stream=True, max_tokens=50,
         )
         complete_response = ""
         # Add any assertions here to check the response
+        has_finish_reason = False
         for idx, chunk in enumerate(response):
             chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
             if finished:
                 break
             complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("Finish reason not in final chunk")
         if complete_response.strip() == "": 
             raise Exception("Empty response received")
         print(f"completion_response: {complete_response}")
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+# test_completion_cohere_stream()
+
+def test_completion_cohere_stream_bad_key():
+    try:
+        api_key = "bad-key"
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="command-nightly", messages=messages, stream=True, max_tokens=50, api_key=api_key
+        )
+        complete_response = ""
+        # Add any assertions here to check the response
+        has_finish_reason = False
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("Finish reason not in final chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except AuthenticationError as e: 
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_cohere_stream_bad_key()
+
+# def test_completion_nlp_cloud():
+#     try:
+#         messages = [
+#             {"role": "system", "content": "You are a helpful assistant."},
+#             {
+#                 "role": "user",
+#                 "content": "how does a court case get to the Supreme Court?",
+#             },
+#         ]
+#         response = completion(model="dolphin", messages=messages, stream=True)
+#         complete_response = ""
+#         # Add any assertions here to check the response
+#         has_finish_reason = False
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             has_finish_reason = finished
+#             complete_response += chunk
+#             if finished:
+#                 break
+#         if has_finish_reason is False:
+#             raise Exception("Finish reason not in final chunk")
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#         print(f"completion_response: {complete_response}")
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# test_completion_nlp_cloud()
+
+# def test_completion_nlp_cloud_bad_key():
+#     try:
+#         api_key = "bad-key"
+#         messages = [
+#             {"role": "system", "content": "You are a helpful assistant."},
+#             {
+#                 "role": "user",
+#                 "content": "how does a court case get to the Supreme Court?",
+#             },
+#         ]
+#         response = completion(model="dolphin", messages=messages, stream=True, api_key=api_key)
+#         complete_response = ""
+#         # Add any assertions here to check the response
+#         has_finish_reason = False
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             has_finish_reason = finished
+#             complete_response += chunk
+#             if finished:
+#                 break
+#         if has_finish_reason is False:
+#             raise Exception("Finish reason not in final chunk")
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#         print(f"completion_response: {complete_response}")
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# test_completion_nlp_cloud_bad_key()
+
 # def test_completion_hf_stream():
 #     try:
 #         messages = [
@@ -235,10 +335,41 @@ def test_completion_cohere_stream():
 #         if complete_response.strip() == "": 
 #             raise Exception("Empty response received")
 #         print(f"completion_response: {complete_response}")
+#     except InvalidRequestError as e:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# # test_completion_hf_stream()
+
+# def test_completion_hf_stream_bad_key():
+#     try:
+#         api_key = "bad-key"
+#         messages = [
+#             {
+#                 "content": "Hello! How are you today?",
+#                 "role": "user"
+#             },
+#         ]
+#         response = completion(
+#             model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base="https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, api_key=api_key
+#         )
+#         complete_response = ""
+#         # Add any assertions here to check the response
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             if finished:
+#                 break
+#             complete_response += chunk
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#         print(f"completion_response: {complete_response}")
+#     except InvalidRequestError as e:
+#         pass
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")
 
-# test_completion_hf_stream()
+# test_completion_hf_stream_bad_key()
 
 def test_completion_claude_stream():
     try:
@@ -266,9 +397,202 @@ def test_completion_claude_stream():
         pytest.fail(f"Error occurred: {e}")
 # test_completion_claude_stream()
 
+
+def test_completion_claude_stream_bad_key():
+    try:
+        api_key = "bad-key"
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="claude-instant-1", messages=messages, stream=True, max_tokens=50, api_key=api_key
+        )
+        complete_response = ""
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# test_completion_claude_stream_bad_key() 
+
+def test_completion_replicate_stream():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
+        )
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except InvalidRequestError as e: 
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+# test_completion_replicate_stream()
+
+# def test_completion_vertexai_stream():
+#     try:
+#         import os 
+#         os.environ["VERTEXAI_PROJECT"] = "pathrise-convert-1606954137718"
+#         os.environ["VERTEXAI_LOCATION"] = "us-central1"
+#         messages = [
+#             {"role": "system", "content": "You are a helpful assistant."},
+#             {
+#                 "role": "user",
+#                 "content": "how does a court case get to the Supreme Court?",
+#             },
+#         ]
+#         response = completion(
+#             model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
+#         )
+#         complete_response = ""
+#         has_finish_reason = False
+#         # Add any assertions here to check the response
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             has_finish_reason = finished
+#             if finished:
+#                 break
+#             complete_response += chunk
+#         if has_finish_reason is False:
+#             raise Exception("finish reason not set for last chunk")
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#         print(f"completion_response: {complete_response}")
+#     except InvalidRequestError as e: 
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# test_completion_vertexai_stream()
+
+
+# def test_completion_vertexai_stream_bad_key():
+#     try:
+#         import os 
+#         messages = [
+#             {"role": "system", "content": "You are a helpful assistant."},
+#             {
+#                 "role": "user",
+#                 "content": "how does a court case get to the Supreme Court?",
+#             },
+#         ]
+#         response = completion(
+#             model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
+#         )
+#         complete_response = ""
+#         has_finish_reason = False
+#         # Add any assertions here to check the response
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             has_finish_reason = finished
+#             if finished:
+#                 break
+#             complete_response += chunk
+#         if has_finish_reason is False:
+#             raise Exception("finish reason not set for last chunk")
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#         print(f"completion_response: {complete_response}")
+#     except InvalidRequestError as e: 
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# test_completion_vertexai_stream_bad_key()
+
+def test_completion_replicate_stream():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
+        )
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except InvalidRequestError as e: 
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+def test_completion_replicate_stream_bad_key():
+    try:
+        api_key = "bad-key"
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50, api_key=api_key
+        )
+        complete_response = ""
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except InvalidRequestError as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_replicate_stream_bad_key()
+
 def test_completion_bedrock_ai21_stream():
     try:
-        litellm.set_verbose = False
         response = completion(
             model="bedrock/amazon.titan-tg1-large", 
             messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
@@ -276,21 +600,83 @@ def test_completion_bedrock_ai21_stream():
             max_tokens=4096,
             stream=True,
         )
-        complete_response = "" 
-        # Add any assertions here to check the response 
-        print(response)
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            complete_response += chunk
+            if finished:
+                break
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_bedrock_ai21_stream() 
+
+def test_completion_bedrock_ai21_stream_bad_key():
+    try:
+        response = completion(
+            model="bedrock/amazon.titan-tg1-large", 
+            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
+            temperature=1,
+            max_tokens=4096,
+            stream=True,
+        )
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
         for idx, chunk in enumerate(response):
             chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
             if finished:
                 break
             complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
         if complete_response.strip() == "": 
             raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except InvalidRequestError as e: 
+        pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+# test_completion_bedrock_ai21_stream_bad_key() 
 
-# test_completion_cohere_stream()
+def test_completion_sagemaker_stream():
+    try:
+        response = completion(
+            model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", 
+            messages=messages,
+            temperature=0.2,
+            max_tokens=80,
+            stream=True,
+        )
+        complete_response = ""
+        has_finish_reason = False
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("finish reason not set for last chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+    except InvalidRequestError as e: 
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+test_completion_sagemaker_stream()
 
 # test on openai completion call
 def test_openai_text_completion_call():
@@ -314,16 +700,20 @@ def test_openai_text_completion_call():
 def ai21_completion_call():
     try:
         response = completion(
-            model="j2-ultra", messages=messages, stream=True, logger_fn=logger_fn
+            model="j2-ultra", messages=messages, stream=True
         )
         print(f"response: {response}")
+        has_finished = False
         complete_response = ""
         start_time = time.time()
         for idx, chunk in enumerate(response):
             chunk, finished = streaming_format_tests(idx, chunk)
+            has_finished = finished
+            complete_response += chunk
             if finished:
                 break
-            complete_response += chunk
+        if has_finished is False:
+            raise Exception("finished reason missing from final chunk")
         if complete_response.strip() == "": 
             raise Exception("Empty response received")
         print(f"completion_response: {complete_response}")
@@ -331,6 +721,82 @@ def ai21_completion_call():
         pytest.fail(f"error occurred: {traceback.format_exc()}")
 
 # ai21_completion_call()
+
+def ai21_completion_call_bad_key():
+    try:
+        api_key = "bad-key"
+        response = completion(
+            model="j2-ultra", messages=messages, stream=True, api_key=api_key
+        )
+        print(f"response: {response}")
+        complete_response = ""
+        start_time = time.time()
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except InvalidRequestError as e: 
+        pass
+    except:
+        pytest.fail(f"error occurred: {traceback.format_exc()}")
+
+# ai21_completion_call_bad_key()
+
+def test_completion_aleph_alpha():
+    try:
+        response = completion(
+            model="luminous-base", messages=messages, stream=True
+        )
+        # Add any assertions here to check the response
+        has_finished = False
+        complete_response = ""
+        start_time = time.time()
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finished = finished
+            complete_response += chunk
+            if finished:
+                break
+        if has_finished is False:
+            raise Exception("finished reason missing from final chunk")
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_aleph_alpha()
+
+# def test_completion_aleph_alpha_bad_key():
+#     try:
+#         api_key = "bad-key"
+#         response = completion(
+#             model="luminous-base", messages=messages, stream=True, api_key=api_key
+#         )
+#         # Add any assertions here to check the response
+#         has_finished = False
+#         complete_response = ""
+#         start_time = time.time()
+#         for idx, chunk in enumerate(response):
+#             chunk, finished = streaming_format_tests(idx, chunk)
+#             has_finished = finished
+#             complete_response += chunk
+#             if finished:
+#                 break
+#         if has_finished is False:
+#             raise Exception("finished reason missing from final chunk")
+#         if complete_response.strip() == "": 
+#             raise Exception("Empty response received")
+#     except InvalidRequestError as e: 
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+# test_completion_aleph_alpha_bad_key()
+
 # test on openai completion call
 def test_openai_chat_completion_call():
     try:
@@ -366,11 +832,15 @@ def test_together_ai_completion_call_starcoder():
         )
         complete_response = ""
         print(f"returned response object: {response}")
+        has_finish_reason = False
         for idx, chunk in enumerate(response):
             chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
             if finished:
                 break
             complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("Finish reason not set for last chunk")
         if complete_response == "":
             raise Exception("Empty response received")
         print(f"complete response: {complete_response}")
@@ -378,6 +848,38 @@ def test_together_ai_completion_call_starcoder():
         print(f"error occurred: {traceback.format_exc()}")
         pass
 
+# test_together_ai_completion_call_starcoder() 
+
+def test_together_ai_completion_call_starcoder_bad_key():
+    try:
+        api_key = "bad-key"
+        start_time = time.time()
+        response = completion(
+            model="together_ai/bigcode/starcoder",
+            messages=messages,
+            stream=True,
+            api_key=api_key
+        )
+        complete_response = ""
+        has_finish_reason = False
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            has_finish_reason = finished
+            if finished:
+                break
+            complete_response += chunk
+        if has_finish_reason is False:
+            raise Exception("Finish reason not set for last chunk")
+        if complete_response == "":
+            raise Exception("Empty response received")
+        print(f"complete response: {complete_response}")
+    except InvalidRequestError as e:
+        pass
+    except:
+        print(f"error occurred: {traceback.format_exc()}")
+        pass
+
+# test_together_ai_completion_call_starcoder_bad_key() 
 #### Test Function calling + streaming ####
 
 def test_completion_openai_with_functions():
diff --git a/litellm/utils.py b/litellm/utils.py
index 0d2ce8c9587c..046c82cf192a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2,6 +2,7 @@
 import dotenv, json, traceback, threading
 import subprocess, os
 import litellm, openai
+import itertools
 import random, uuid, requests
 import datetime, time
 import tiktoken
@@ -1915,7 +1916,6 @@ def exception_type(
     ):
     global user_logger_fn, liteDebuggerClient
     exception_mapping_worked = False
-
     if litellm.set_verbose == True:
         litellm.error_logs['EXCEPTION'] = original_exception
         litellm.error_logs['KWARGS'] = completion_kwargs
@@ -1970,7 +1970,7 @@ def exception_type(
                 exception_type = type(original_exception).__name__
             else:
                 exception_type = ""
-            if "claude" in model:  # one of the anthropics
+            if custom_llm_provider == "anthropic":  # one of the anthropics
                 if hasattr(original_exception, "message"):
                     if "prompt is too long" in original_exception.message:
                         exception_mapping_worked = True
@@ -1979,6 +1979,13 @@ def exception_type(
                             model=model,
                             llm_provider="anthropic"
                         )
+                    if "Invalid API Key" in original_exception.message:
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=original_exception.message, 
+                            model=model,
+                            llm_provider="anthropic"
+                        )
                 if hasattr(original_exception, "status_code"):
                     print_verbose(f"status_code: {original_exception.status_code}")
                     if original_exception.status_code == 401:
@@ -2031,7 +2038,7 @@ def exception_type(
                             llm_provider="anthropic",
                             model=model
                         )
-            elif "replicate" in model:
+            elif custom_llm_provider == "replicate":
                 if "Incorrect authentication token" in error_str:
                     exception_mapping_worked = True
                     raise AuthenticationError(
@@ -2068,7 +2075,7 @@ def exception_type(
                             llm_provider="replicate",
                             model=model
                         )
-                    elif original_exception.status_code == 400:
+                    elif original_exception.status_code == 400 or original_exception.status_code == 422:
                         exception_mapping_worked = True
                         raise InvalidRequestError(
                             message=f"ReplicateException - {original_exception.message}",
@@ -2110,7 +2117,31 @@ def exception_type(
                     llm_provider="replicate",
                     model=model
                 )
-            elif model in litellm.cohere_models or custom_llm_provider == "cohere":  # Cohere
+            elif custom_llm_provider == "bedrock":
+                if "Unable to locate credentials" in error_str:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=f"BedrockException - {error_str}", 
+                        model=model, 
+                        llm_provider="bedrock"
+                    )
+            elif custom_llm_provider == "sagemaker": 
+                if "Unable to locate credentials" in error_str:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=f"SagemakerException - {error_str}", 
+                        model=model, 
+                        llm_provider="sagemaker"
+                    )
+            elif custom_llm_provider == "vertex_ai":
+                if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=f"VertexAIException - {error_str}", 
+                        model=model, 
+                        llm_provider="vertex_ai"
+                    )
+            elif custom_llm_provider == "cohere":  # Cohere
                 if (
                     "invalid api token" in error_str
                     or "No API key provided." in error_str
@@ -2184,6 +2215,13 @@ def exception_type(
                         model=model,
                         llm_provider="huggingface"
                     )
+                elif "A valid user token is required" in error_str:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=error_str, 
+                        llm_provider="huggingface",
+                        model=model
+                    )
                 if hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 401:
                         exception_mapping_worked = True
@@ -2221,6 +2259,8 @@ def exception_type(
                             llm_provider="huggingface",
                             model=model
                         )
+                exception_mapping_worked = True
+                raise APIError(status_code=500, message=error_str, model=model, llm_provider=custom_llm_provider)
             elif custom_llm_provider == "ai21":
                 if hasattr(original_exception, "message"):
                     if "Prompt has too many tokens" in original_exception.message:
@@ -2230,6 +2270,13 @@ def exception_type(
                             model=model,
                             llm_provider="ai21"
                         )
+                    if "Bad or missing API token." in original_exception.message: 
+                        exception_mapping_worked = True
+                        raise InvalidRequestError(
+                            message=f"AI21Exception - {original_exception.message}",
+                            model=model,
+                            llm_provider="ai21"
+                        )
                 if hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 401:
                         exception_mapping_worked = True
@@ -2266,7 +2313,7 @@ def exception_type(
                             llm_provider="ai21",
                             model=model
                         )
-            elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
+            elif custom_llm_provider == "nlp_cloud":
                 if "detail" in error_str:
                     if "Input text length should not exceed" in error_str:
                         exception_mapping_worked = True
@@ -2342,6 +2389,7 @@ def exception_type(
                             model=model
                         )
             elif custom_llm_provider == "together_ai":
+                import json
                 error_response = json.loads(error_str)
                 if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
                     exception_mapping_worked = True
@@ -2364,6 +2412,13 @@ def exception_type(
                         model=model,
                         llm_provider="together_ai"
                     )
+                elif "error" in error_response and "API key doesn't match expected format." in error_response["error"]:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=f"TogetherAIException - {error_response['error']}",
+                        model=model,
+                        llm_provider="together_ai"
+                    )
                 elif "error_type" in error_response and error_response["error_type"] == "validation":
                     exception_mapping_worked = True
                     raise InvalidRequestError(
@@ -2393,7 +2448,7 @@ def exception_type(
                         llm_provider="together_ai",
                         model=model
                     )
-            elif model in litellm.aleph_alpha_models:
+            elif custom_llm_provider == "aleph_alpha":
                 if "This is longer than the model's maximum context length" in error_str:
                     exception_mapping_worked = True
                     raise ContextWindowExceededError(
@@ -2401,6 +2456,13 @@ def exception_type(
                         llm_provider="aleph_alpha", 
                         model=model
                     )
+                elif "InvalidToken" in error_str or "No token provided" in error_str:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=f"AlephAlphaException - {original_exception.message}",
+                        llm_provider="aleph_alpha", 
+                        model=model
+                    )
                 elif hasattr(original_exception, "status_code"):
                     print(f"status code: {original_exception.status_code}")
                     if original_exception.status_code == 401:
@@ -2445,7 +2507,8 @@ def exception_type(
             elif custom_llm_provider == "ollama":
                 if "no attribute 'async_get_ollama_response_stream" in error_str:
                     raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
-        raise original_exception
+        exception_mapping_worked = True
+        raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
     except Exception as e:
         # LOGGING
         exception_logging(
@@ -2563,6 +2626,7 @@ def __init__(self, completion_stream, model, custom_llm_provider=None, logging_o
         self.logging_obj = logging_obj
         self.completion_stream = completion_stream
         self.sent_first_chunk = False
+        self.sent_last_chunk = False
         if self.logging_obj:
                 # Log the type of the received item
                 self.logging_obj.post_call(str(type(completion_stream)))
@@ -2579,41 +2643,71 @@ def logging(self, text):
 
     def handle_anthropic_chunk(self, chunk):
         str_line = chunk.decode("utf-8")  # Convert bytes to string
+        print(f"str_line: {str_line}")
+        text = "" 
+        is_finished = False
+        finish_reason = None
         if str_line.startswith("data:"):
             data_json = json.loads(str_line[5:])
-            return data_json.get("completion", "")
-        return ""
+            text = data_json.get("completion", "") 
+            if data_json.get("stop_reason", None): 
+                is_finished = True
+                finish_reason = data_json["stop_reason"]
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+        elif "error" in str_line:
+            raise ValueError(f"Unable to parse response. Original response: {str_line}")
+        else:
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
 
     def handle_together_ai_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
-        text_index = chunk.find('"text":"')  # this checks if text: exists
-        text_start = text_index + len('"text":"')
-        text_end = chunk.find('"}', text_start)
-        if text_index != -1 and text_end != -1:
-            extracted_text = chunk[text_start:text_end]
-            return extracted_text
+        text = "" 
+        is_finished = False
+        finish_reason = None
+        if "text" in chunk: 
+            text_index = chunk.find('"text":"')  # this checks if text: exists
+            text_start = text_index + len('"text":"')
+            text_end = chunk.find('"}', text_start)
+            if text_index != -1 and text_end != -1:
+                extracted_text = chunk[text_start:text_end]
+                text = extracted_text
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+        elif "[DONE]" in chunk:
+            return {"text": text, "is_finished": True, "finish_reason": "stop"}
+        elif "error" in chunk:
+            raise ValueError(chunk)
         else:
-            return ""
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
 
     def handle_huggingface_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
+        text = "" 
+        is_finished = False
+        finish_reason = ""
         if chunk.startswith("data:"):
             data_json = json.loads(chunk[5:])
+            print(f"data json: {data_json}")
             if "token" in data_json and "text" in data_json["token"]:
                 text = data_json["token"]["text"]
                 if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text
                     if any(token in text for token in llama_2_special_tokens):
                         text = text.replace("<s>", "").replace("</s>", "")
-                return text
-            else:
-                return ""
-        return ""
+            if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
+                is_finished = True
+                finish_reason = data_json["details"]["finish_reason"]
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+        elif "error" in chunk: 
+            raise ValueError(chunk)
+        return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
     
-    def handle_ai21_chunk(self, chunk):
+    def handle_ai21_chunk(self, chunk): # fake streaming
         chunk = chunk.decode("utf-8")
         data_json = json.loads(chunk)
         try:
-            return data_json["completions"][0]["data"]["text"]
+            text = data_json["completions"][0]["data"]["text"]
+            is_finished = True
+            finish_reason = "stop"
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
         except:
             raise ValueError(f"Unable to parse response. Original response: {chunk}")
     
@@ -2621,8 +2715,10 @@ def handle_nlp_cloud_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
         data_json = json.loads(chunk)
         try:
-            print(f"data json: {data_json}")
-            return data_json["generated_text"]
+            text = data_json["generated_text"]
+            is_finished = True
+            finish_reason = "stop"
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
         except:
             raise ValueError(f"Unable to parse response. Original response: {chunk}")
     
@@ -2630,7 +2726,10 @@ def handle_aleph_alpha_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
         data_json = json.loads(chunk)
         try:
-            return data_json["completions"][0]["completion"]
+            text = data_json["completions"][0]["completion"]
+            is_finished = True
+            finish_reason = "stop"
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
         except:
             raise ValueError(f"Unable to parse response. Original response: {chunk}")
     
@@ -2638,7 +2737,35 @@ def handle_cohere_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
         data_json = json.loads(chunk)
         try:
-            return data_json["text"]
+            text = "" 
+            is_finished = False
+            finish_reason = ""
+            if "text" in data_json: 
+                text = data_json["text"]
+            elif "is_finished" in data_json: 
+                is_finished = data_json["is_finished"]
+                finish_reason = data_json["finish_reason"]
+            else: 
+                raise Exception(data_json)
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+        except:
+            raise ValueError(f"Unable to parse response. Original response: {chunk}")
+    
+    def handle_replicate_chunk(self, chunk):
+        print(f"chunk: {chunk}")
+        try:
+            text = "" 
+            is_finished = False
+            finish_reason = ""
+            if "output" in chunk: 
+                text = chunk['output']
+            if "status" in chunk: 
+                if chunk["status"] == "succeeded":
+                    is_finished = True
+                    finish_reason = "stop"
+            elif chunk.get("error", None): 
+                raise Exception(chunk["error"])
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
         except:
             raise ValueError(f"Unable to parse response. Original response: {chunk}")
     
@@ -2683,13 +2810,21 @@ def handle_baseten_chunk(self, chunk):
             traceback.print_exc()
             return ""
 
-    def handle_bedrock_stream(self):
-        if self.completion_stream:
-            event = next(self.completion_stream)
-            chunk = event.get('chunk')
-            if chunk:
-                chunk_data = json.loads(chunk.get('bytes').decode())
-                return chunk_data['outputText']
+    def handle_bedrock_stream(self, chunk):
+        chunk = chunk.get('chunk')
+        if chunk:
+            chunk_data = json.loads(chunk.get('bytes').decode())
+            text = "" 
+            is_finished = False
+            finish_reason = ""
+            if "outputText" in chunk_data: 
+                text = chunk_data['outputText']
+            if chunk_data.get("completionReason", None): 
+                is_finished = True
+                finish_reason = chunk_data["completionReason"]
+            elif chunk.get("error", None): 
+                raise Exception(chunk["error"])
+            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
         return ""
 
     ## needs to handle the empty string case (even starting chunk can be an empty string)
@@ -2701,49 +2836,94 @@ def __next__(self):
                 completion_obj = {"content": ""}
                 if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_anthropic_chunk(chunk)
+                    response_obj = self.handle_anthropic_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.model == "replicate" or self.custom_llm_provider == "replicate":
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = chunk
+                    response_obj = self.handle_replicate_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif (
                     self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
                     chunk = next(self.completion_stream)
-                    text_data = self.handle_together_ai_chunk(chunk)
-                    if text_data == "":
-                        return self.__next__()
-                    completion_obj["content"] = text_data
+                    response_obj = self.handle_together_ai_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_huggingface_chunk(chunk)
+                    response_obj = self.handle_huggingface_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
                     chunk = next(self.completion_stream)
                     completion_obj["content"] = self.handle_baseten_chunk(chunk)
                 elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_ai21_chunk(chunk)
+                    response_obj = self.handle_ai21_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
                     chunk = next(self.completion_stream)
                     completion_obj["content"] = chunk[0].outputs[0].text
-                elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
+                elif self.custom_llm_provider and self.custom_llm_provider == "aleph_alpha": #aleph alpha doesn't provide streaming
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
+                    response_obj = self.handle_aleph_alpha_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
                     chunk = next(self.completion_stream)
                     completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
                 elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
-                    chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
-                elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
-                    chunk = next(self.completion_stream)
-                    completion_obj["content"] = str(chunk)
+                    try: 
+                        chunk = next(self.completion_stream)
+                        response_obj = self.handle_nlp_cloud_chunk(chunk)
+                        completion_obj["content"] = response_obj["text"]
+                        if response_obj["is_finished"]: 
+                            model_response.choices[0].finish_reason = response_obj["finish_reason"]
+                    except Exception as e:
+                        if self.sent_last_chunk:
+                            raise e
+                        else:
+                            if self.sent_first_chunk is False: 
+                                raise Exception("An unknown error occurred with the stream")
+                            model_response.choices[0].finish_reason = "stop"
+                            self.sent_last_chunk = True
+                elif self.custom_llm_provider and self.custom_llm_provider == "vertex_ai":
+                    try:
+                        chunk = next(self.completion_stream)
+                        completion_obj["content"] = str(chunk)
+                    except StopIteration as e:
+                        if self.sent_last_chunk: 
+                            raise e 
+                        else:
+                            model_response.choices[0].finish_reason = "stop"
+                            self.sent_last_chunk = True
                 elif self.custom_llm_provider == "cohere":
                     chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_cohere_chunk(chunk)
+                    response_obj = self.handle_cohere_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider == "bedrock":
-                    completion_obj["content"] = self.handle_bedrock_stream()
+                    chunk = next(self.completion_stream)
+                    response_obj = self.handle_bedrock_stream(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                 elif self.custom_llm_provider == "sagemaker":
                     if len(self.completion_stream)==0:
-                        raise StopIteration
+                        if self.sent_last_chunk: 
+                            raise StopIteration
+                        else:
+                            model_response.choices[0].finish_reason = "stop"
+                            self.sent_last_chunk = True
                     chunk_size = 30
                     new_chunk = self.completion_stream[:chunk_size]
                     completion_obj["content"] = new_chunk
@@ -2765,11 +2945,13 @@ def __next__(self):
                         self.sent_first_chunk = True
                     model_response.choices[0].delta = Delta(**completion_obj)
                     return model_response
+                elif model_response.choices[0].finish_reason:
+                    return model_response
         except StopIteration:
             raise StopIteration
-        except Exception as e:
-            model_response.choices[0].finish_reason = "stop"
-            return model_response
+        except Exception as e: 
+            e.message = str(e)
+            return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
     
     async def __anext__(self):
         try:
@@ -2796,7 +2978,6 @@ def read_config_args(config_path) -> dict:
         # read keys/ values from config file and return them
         return config
     except Exception as e:
-        print("An error occurred while reading config:", str(e))
         raise e
 
 ########## experimental completion variants ############################
@@ -2899,7 +3080,6 @@ def get_model_split_test(models, completion_call_id):
     try:
         # make the api call
         last_fetched_at = time.time()
-        print(f"last_fetched_at: {last_fetched_at}")
         response = requests.post(
             #http://api.litellm.ai
             url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict
diff --git a/pyproject.toml b/pyproject.toml
index bf0f0097c76d..80b3eb99d1f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.738"
+version = "0.1.739"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"