From 889679a0dd072760f80db043503d45b484062050 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 23 Sep 2023 15:04:34 -0700 Subject: [PATCH] fix exception mapping for streaming --- litellm/__pycache__/main.cpython-311.pyc | Bin 47800 -> 47803 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 123173 -> 129090 bytes litellm/llms/replicate.py | 6 +- litellm/main.py | 12 +- litellm/tests/test_completion.py | 18 - litellm/tests/test_streaming.py | 522 +++++++++++++++++++++- litellm/utils.py | 294 +++++++++--- pyproject.toml | 2 +- 8 files changed, 760 insertions(+), 94 deletions(-) diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 3907a3f34d60e36f1d9d9ea0f252baa562caf5d3..b395309b62382bf73a0074cc09517dbae2f7fc57 100644 GIT binary patch delta 1524 zcmZ8gdrVVT80YJQQfe*INfm?zM6@ztDzydVwOR26Do9*HTnF4MHb_g(tyrWWqNsa- zk8jg0VWOK8vxx?e*~?^^f0(FCCT@&vS6Rf&IF}eP8=1*Ww)0&yOLqVG_4ob0=lRa* z=M%b%6FO_Z)fyWmzSs(z+%;$|ngIuR*^Ds|KiP>@IyivWQnYWGdycJz6W4 zi0;f*QHy7;j>RYLndn&Z5EL}IS3?NLGUnnAcc-vZ7bKeuf&G!0(l$)>c%d2VJvAV> zrl$#gJ3jD~LKt1SrLbMo^?@rwZ^09}UQ_cab5ztPX1wIihnA;aCcHbaEN>&UVlcJP zAnBzD-phLr+HhBXGPGk~{#@9JeVzmy&98$FELr-QV^^;}GG8iQDs~i^- zbfT@`dCTqw9qo=C7j|yJYS^P4;7q~C(5bbAK(&bN!q&n$(1j-oi^MU;3abQ9T`&tP zixxZE+bSXzl76y|5-oW9)0(!>k5qf|llJ1dqDDy5SWE?<^C21JsD$x4$?2{C`5o?XFapC|4bg?vLWRYb6O{*|xX zTzEUqF&<@@<{Gsp4%_FA>jk}QPkg&*+$h*YbkuVxLn#Yy*cT1k7mb@piqWLSH|#mX z_8h*-pJ>Lvg0pxByKR9?tZC1IY<{G@)c{j?{O-dL_XBmLC9@HNpYXz-o8ZEOT^`8C zt6jygoX76nU;;P#x&yERzwLf!&Oa4lyl1e+N#FV3a__Z)mM0zsm2O#rFR3PMyi=EAQ(_&MH@=h`=#)+$uRwB zF>K=U(N|2??`Z&^0B1b=8Lu5nTX|C$s@dCED@zjH-Y7-T?$jBs%)*#_R)P-RdG@w$`tOt! zA-7&>QP>cfHrzhAP8{UQ;5n$m@cB}>gI}M|$jPQs%Lyt8HWAbk)Dtui9H)FHm}=PS R7k{%3OytRMh0f^$*ERqE delta 1483 zcmYk5e{54#6vun|)^??KbSq<9StlD~PzEhzW$S)#(WaR&gvrEYXf1n>tuWeh+rhSW zj4=lIgF!e7k`T%i!6*aaY7qHr(L}@_12aS`2#Y~6qAms!qlxjJ4in!WpZ7h#?>o1> z_(1)^1GVv_(Kt;d9*rqY88~aK&4wY~m^}gF<*+oXVF<_aX7OXWb|Lv#UW=p|im75t zFu!;vcNe6pvA@s?^D$nSs}1GE(nDFesxS+8EzFimB8@n_u$=!{Sg1|5v0zzoRCM9R z%ckMtl3cWu-UEiZvSkp)iK2PfQ?>*DS(=0U92#9pU{5SZj$ppa4V~EL@`2EctioR) z{9Eu>mj|}u(sB=MN&2P0CE@SF;c~aG^MqcddPk3=WtGtNFv>)<8yhRuKo17L z{itEbMm6n?9TRqW^)eVpe!{8hx1m2d5&|tEcPB<`=E6=KuBjEDF;UYjbpE1Te5$s@ z*4x_@Ym%j@K4GoUy$^d%k6fqTQ;$4|XKIUKmpJy^19kKDyN_t(JxA21Uce=F)$k%Z z=444P<*0CTP9`3$>w=dttNt=|?e_-6h;3{D#+_voHOF+^dz zD&1_9LKg@h5-t+{BK(amuidNzQDk6BIOGjSm_N|QKEZCU%k(LwTqYPr221Co-WB?q zJ0{DOS@x?Y$GFKcsY*$obJJ|Sl2>xo>>M{cCne#yZMIBm#QT9(HDlJedH$r1j8tK~ zQp+3t7Cjb6^7tV8-T(_w-sS{5KfEoP0tTKrZ~#o>H1TV~HNtf~zw8t7zCkh_x{8rDiWjH^c~ifFm)2VvVXnUnNXjMQ??vrvhY4!`&x^`VtpVLz|r+jai}?ApVF diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index bf977f9fda2c4b1f68d3c795e880c5d59e633970..5f546c9e34bd598e8ea75fed50e5eeca961e823a 100644 GIT binary patch delta 19358 zcmb_^3tU^rmG?bY5A;NUka&YYV2m*s8$SRWFc`mM{B~^F4#tdZLy)kqWMha3PU1Aq zqY1H+4DLE9$>!lSZsL@rxM|bcNxF4*lSZOyqb2p$+1<9=_P1TqXPRxl{mxvyu8^F# zZN%m1-g%rkbIyOxoI5k{%o{0hOsAwir`KyZ{Jg%;V1Huk!>QTu@x$dOs|+>+;Ry6< zZCdd=&6bAWd~doV!tA1twt=A-aFs1z_!3qYpZoE zv@LWjvMs`LRgT5B#f~MmC5}2{xDF zF7h(=wm4dCt>S%V?+QnotqnLOssegedi_~u9yqUxuWKOCx$Pj=x!u0p-e}*E!?Y}Z zVt4kNiD?<8Vac7{oNX7DY_jjL@9>WCS;m`*ndM=+O`6_#Gsz0q*^Tw~knEG3Z7o$V%qvOA z+fS@0K^NgjX)kfuZFbngcxNL{ZBzbM~`53W~p>Pv^mIR0M$oGE#98 zY=_7kWZO+Dk@k?oqzcn+B}d3Srp}lO5AdYsq{7xq1hN22IMiI^hqy>BrZ`W_GrehY zIs465bHq&+V`|?)j#MK!+0Q}(XuQMTYWck3BjhM)u$eY(*ewNYa#$ z*L+fLbFq3T&2qeVM`XbEj)nQ{AT3z86@ORQkCHa~(E~cN5_ujYtFVXt*zW3+Fl;+c z*5Km+S&MX#tV4PmS&!61ZbCXFGTVR+$w@mlbUWFIsl%jSOy6WbK{jIpTQFZmwj%R8 zknOgU(0M1>j^*y^<_-Yaf!#kzj+2}5`);xm)4xD=A-#v#PRcmjDeoAnu>(rt#2!)7hZRQ0&B{OXuzlR9 z9tGItJ+FBQc6vMYrtu?M2f#k>Pjq)c$9SjyF96-%4#PjfAu%^Q?E`>g-ruD^4@bwJ z$N)K<7#}lt^6<2m&$$<#@;;FB1sEPT=9U6H@2$&w2QG{&^6Pl`hPSEkIe2aSXN4w! zi{pPT^6>E1_&~`YRPdYe!t zEYN#5mF4EUQ^P(8ze1u3)kJlY^QgvD-j^LEG_?>+MM>rKq%cVyu(^$llXurbeO9y& z;k z(wZXJoFX5RdpI{wPZdF#Ru>kn$Zhp*$~1d5!)aSQ9MPXojrXy40?~V*BO}r$r^&+; zL%6bv7~=9$NSgGSq|+J;6nJ-MY8iie$O49f1pXel6Fl6APSCoIr-$?COBR?5`qRU? z^bc5mIAchUEgGXnzRx``A`6-nNM^j_7utx)13lT1GRHU}hhWd$6_FHHE0&LaHYVw_ zIZ|d0s}^Na`)N#Q;X)`X%Zir=VQF8*TLNs7=tX>@VnEE+vKgz`NEV_{w z@Hh0i6Gk-7W`uolNEigsqol;6LmXm2j9`}H!|KtAe2SByf|N>XNrnKidWOYnX#L_u z{Co7IEUbr6gHj`f8%$o4%#t)HHAaG%v}7UV4bK=Z!tdkMb^TFc#d6U>e*ON(aSM+M zmo}D)Mj+*J7LV`gj7F08ImQmTU4N`3h>YBZlPO;c$B3U!94pD#eHOQbu@V^s90FoS zUrACx#r5_2W90_9kzt1B>kXFVOcKTlj+5D86BdvP3H+p{oD&))sPfrTxQqSA;iht) zKsJ@1$!2bpO{J6#8@3{sy~w31%%zxAGhtkEGVeM<)x-v2RZ@0~k=^_dJM;l7$bvA# zT9IMmT(L00i1U+rg+-D2v*Kq)lE45JHvfF8IJG5=)!lJmNHV%`I4lk;BN4v|O_8Ls z#@b+G_4PPpqk_8aFm91HjoTyYp3T%vFx1C-*#Bvk>$NL+JH5J6|7!2?CSmWZpH7TN2bnK$yTTdH>_sT|ZX2 zac)UKm?z>u>IxIsp=nmczuKT5{IuCS2?x{`x*qI*T&OE|K84_Fi-PO>WTogz+Q};M zI~0LRk+V8F3%4b~%Nj{7DM})>7I7q5wHbXOMI=&zm2aZ^s_|7ii=LWquMM#jp9V>xO3iB4~ZNoi^=7%pIm8^JxV@3}C2=Lk zLOt{GHE4?`6+K%y+3HCpTVm^#m{uS*C`JFa_%dP7xjD&5h;ZE=Un9yj`WlHtk|beE zHYY zcAd6nMp4Lm%|8weGEvC(IF-k=e6*2c5eFa?%gQ z$5LZ+aciKA-nInt3%c6}QyhK7-rHaqOj~N{?R9h<7W$5L6T8r0p=XxB8*qy5se{Y> z0MMd($V(qItaTpi?CmC&mQ8Cd>+J)yxgLHx+|5)c^dlL}-|p<(-)pzH`z*bEU7c>b zrAx3AyVKp>+3OlKw%Uo%*LCa4<6ZW{?(ROPrOF~4WA#j1I}h3&owwSBxID`8=IpQw zZu@ad%UZG7L!B;*v(IhWZ?`)wM_qQ(Wa)NVQ1TvomwPZTCcuGiC$S9l9TlRPnITRs zYo(3Q(gt{?uCT=t>KSqrQ2bkwdAG|V*pD3T#vTl2tvPz|V7K$&fzB>_TA0mRj*9hxqVH4POY?nni!ibZn4>f@~WlGW8*GXLTn@!LL#~*9P zYW=jJ8J=ytA9s)hMvkW7w%{MLIPL`ZdmX3H_%zE$Rw7|nWmD+JFK-FRTYU1CYel6# zdH&UjYt2vv=Jjet*;TE{XP*7lD!;Zapskzc+$w5qfw%ZuRP+}u(3D#;cI%{JzTYta zsxjA>S9NZ{Z>$d(>!-O?+R_RQxzidhyTUiODKK}HFNpffqrKrM$w}yA)WTEg838qtDqV>%Fd}L)93rr=Le1RFYNLeoA3(G ztMz5hy_#DUw6t94_E}o+3Km*}W!2Lu@;viL9FpHLGR?_N=8qIyX7h9^ml0~umtHGj zMdd4@x-nUi=1cWH<4x?^B zeZ}P4d)Q|_;x``&n2-34=d7{mTP^B*8~=C@@27$URn=1bni`& zGx7L(*aH$Zr)_{{nJO)BM9Y3A+BZQ8OjqE-;QqIkz>Bpet&9zAhqA8>Zhl+!9-O#Li@66MNL~h_f|dM)+qZfxU|Q0 z+{Zf2V1evob1OfXE&C)}fe&3WTHFblba*>V!_OuT?|>)eiYE~6zn(DfhOf)v3Hoja z!s#q6*asHvgKRV~lBSthvk%I6zDh&;_QTz29&D3xcFqQbv+*R8%Ddp8)`q-h{=3uhBgR z;C0wYa}UDJyc6hqeNZLi-7;DyK#2}_w`DBsC)|+2+u5JvJ-^4m3p>4+ziXD`jH#rB zy-*-idX&`veaJGU5Lx_eG9(LUtyK{&K1IO9~x6sks?X)*ogh#Q?s!tT8)7{zd z3k5z1N0CfvUGBca9fG~n)#u#yyiBM?cR8i)c6H!%(Cs>e6A+t;g%3FT!eN--U6C@S zB=#?x2_zLMLLGihDY_0Fb>2FqipV22BfQSa zGWJtyCRRre%HTq`$yqEGI(7&)?ApqV_aUa7;DW|%y6p6_u@t{?cEC9MlYkXf@IH@fh?e}ZT1KRRwPNTjBC+KdN z-F>PpXvltO=)R%Rz31A{mj(>;r#W6P*59@gkNu#1aNK zXx81{YyTtSNr_fhxE09q& zk`p{bDvxRik7zKI5S0ifwb-f>QL7=^a2nP`Neu234BLuWxEU*tm=Z2BYl$dA>(TZe zFt7d=CJicgw{2`+xp$kehKce4k_x%-9n6`Mvyejg6-z$BlGP#@gv*Q(!&w9X^kfe# z@=g|I&?`N#Mrmh8pQ4+NfO$B&%0}c;Yp)BL+cawO=?n2Gvr-a{8eh%(+BA)Ab2+Q80ZDdXe{_a2-ia z$WJ2%gs501A|OePb!|v?uYv)Rq>1oN4@1?2P_4ZaqAAsB0F*Opr{JOaT z-CUn;F17Z-9m8Q~x9gn3Z>SC!s+qG}1sCcs?h4ede!q78WbOJZ8-08C`)j)bwOygy zs4uJtSQcLB2v}AocZf-c0lMIGyRS6*47=DRSM3Uo19*pizYp%u#OYLtE-)oJ)1h;s zA07oGPU#)}P$1W!E$M@Yp=d?ey@;0iHfr(tkdKiVem-Hi0 zuY8bc+rwN;`4Xji2Lz4zqk99!%5yCNWA(XX0psEeNSbhd$SpjzKA2x}YJ=1*Ft0#u z$Dt%ui;ccuZ830{4Vf)gdT9X9o1J5wJIFeRr{OSOh)m;(aOY_6VJKeA-04*6LA%o~ z;CmnGz*jc+%uWhFV0Z~2iQt9)T!5RDXPEd-QT&Rq_`ZTF6#X*=)~T7m>5bg$nJwud zaW64(hnctv6qn6EpX(n>NDDr|tje7#d zWuGZ~NZAD~YVNW+v!x&;{r_Xq@7ltgsHkop4+>&1hR#imlbc-XL7b%oeF)~HV~#?S zBL;7hXN40dLHgkm$hAa6E7ZnT&dLFY=YFFf5>icjL^YAw5p@$A^HIoFC_>I8=0~1m zT0Fr>oQ)S8gH_6xn8IEZ6*jCFA!=e8Dq?UfL^v8uqd7i930}c0%a}QkHG4#klUI7- zScNaW60cxk`8j={aB(P-nHR7$jOa)7KQ$MPsr}|T0rMPR+MJo7{&<=PY)g!4wq4oo zE7*pYpB_FAx{&slwan%&&(3UFCTd@yL{~;1>xZ&Ucuq4C)$q}%Mun4?)C8z@(Q%{u zmKuO4!)r~<708HcNbam;QBSElAxt*8qw|+|%ut=fKS>ee&qx$JrR+QEK77LL?zfXbNDC27?*V-HWb0YB01V z3v3f(88jM`P;~7=CEjIBnGbb3@q{x5*@t_(*&+`G7g&d*j~$|LmzljUVTBW%7}*wK zV3AscmtSQGs4Vz8H_fLe!$H>QBA>y67lu5eJ0BTF5Ex=<#_ZATN1Ac5z&PJ$oF6Qi zHL^ZfI%j0V$92JUGtD`jGrDA~&7WQoNUxaYKwone@u3(y_;jk@JTG9L=S!P66C3Z4Z#OM-x$D`$RE7r zs6=QFM?H{QjC{}wRp6}1V(2Nw{sFh$6^hBHG$BlH!)atHt*g&*xYy3szqW+l?8InB z1T`X8gmTOau~=PT$gYsgo>LkTl^tm;YLZ6lp;^>-0$PUu%pm(NR>ZfnG!s>yR*#nZ z(@FwqC8rb^v`lk4eABv`Q*btSY~^!npIUqFpuco+pmg#3rHzxNjsDW+Kxwl-XL%rJ zImYY88SX7_bkJw0z$=(lJT^CwHILys=b_v0yWLkG2}TN+Qb55srQ#X)Li)xXP?^<<;WI;`_#uk0)7n9wxC<)j zygQ++0qu=bXlyD$fXLX{@%X4*$i`>Mi$1Q(E9t|xL1se-d;1kXBMRpjHir%>^dXv1 zbGY#;6(XD}h(c`od)r?S*Zx9jOZa9XZC4xZeBnv>T-%j9-;uU7qut24OYHCG&~B=v zzj7y`C+^0jD=mF_7nD-pJ)ka^J4-)BnAE5e7y`UsTWo%87a0CtBS?jkIlo0eISF~_ ze$016tzI32R*yaenV#t1HAKKRB6ALWiqvUwQX^;9-OpF(bb9R$6gr*$`3|V18Bc9 zqPs=nzSK&Lhq)e8L`H%4D9|HwJ?2OXF&<@$Cen@9%=kBX+#6$m`w$x9wpe@OqGpuy zF8tHRA;s+qJeZIXy6cC2?zn6h*U#FE`W?Y3eIn+tUb;ehna$&As^<3~m+4wt^&a-8 zCL*cWpQ>d2k;u31Gvx#4$=t^9hcYiLNybZ-V>bMVdu!IiY2j$+!gR1gHD34@hh4rc<1=#CCHj#?@GE?7u zVA5N<@kHd)@E|*)5gJ?>9i`DOprFxo?=N4_EXSdXX(P%9UP>4gDtxVz43j%yh7)KQ zQn=@NpInlg?@@G=MVfGLh}_`>j~B;SomeM6znIq6If{owqvs`kfYW)6>$|)pb-w)0ddC+b zw{QT|$6=axfqu@llFD;i#)@!_*qDV2Mbn5xI`KuYthAQDZ*7{iHeEdGx2_CWSB~5i ztXXz(N1&!{q3ulUo)SMzYWH^qb}03sxqR}{#b zH_a(?>VhTK(bdD(imZX6g%=gRqDH)~&YI_|+3laTCopS|&$1`D>ZUl3cnn~)Es$U1 zlU)A%nvd}_ZNMhjcN%BT70q3qKIcMdV8Oa8w@xnDaXTo#xeuL#Y_D8b%Ff4 zk(EJa-ldb)r59KFt*rrT>)!#L3ptaf2A`=R)X0oB8Yiud7wvv)Tfo}3=x=G{V_Nz! zlvYTTa*?Y!@O-7kduf!CeQE>|< z`La#yp?cP4Gw07_Mg3;Y7QM`GDcPcuP4cYhq)LG)lloOz_z)~>)#2CF0wdB7l&r=F zs@y&KvJXn;?a7k;I34i*$61O!`BdixSJ7|T+>iGM$ zH0XsbVDetuZW?}>Re6PxuQMV}Yq5EZO<|$=Nu0UF8AzP!#5wF|ELWWB#98SR_V^b@ z*z76HVWg6gudp;m8rk~D2AuOwM1JzPX>g>{(w8wr?u2d;fwTJDWK-R&(UvE_=XHOedx=hU`my>ht>&8P*D|1 z`j=5CrnArB-?po<>KQg2e*?980yU?hXU@Q5-u53AQTvymI-{jjcDaCWDUw|-QsBLy zl1)7GC0MP3@6rE$3VsFuPXF~E;q^3eEs#YR0^^9Qg3mq;<#>YU`%gm)uaeR9XJCm^ z$FlUa{TXmsGTFU}5iy)dwMWk1bvQd6cEL+S>;$vf9gU?ia#o+$gd%$UuLOm3(|wShhlfiJ@ZoI@ z>9G>6JC7bQLZ)}^&RO(uE@=6|IrM>Cu+XJng$}hr=wRCHzmt(!EUlcXUW7SCv+-Vmi^i_Mgp4WARA_#hTX?qOOy$|iQwDnIMfgb5+R)m_ zz0p{@q{kN~cLFEFxFVs!! zdlg=m@4ZnncqPFiK zpOu(2teg1WcOV_+q_LX!FjUHfrR;tgk|{%1r?aDzkPe64<-${#c*LcR@|R(kb_FZL z$h|sx;4(5@N;kg&v$d;PIwNOv^yC|GXFg->LIq(o8uAz-cSfGjO_aO|UaloluazDjXv-WRBBS$JtJ>UTz&^^a^)es=k*iCemDcF9r&=Sz2XdgV-jlgQ|fMv z%H4f^y)I!c{oN!Sg0YFcKfuFNaQnpL0r(GqL3+oJzWNq1hcBe$IMh%!%_Y$NM!M}Muxj@f1@nG#pUy>{m$^GHAZQ%ezmo&4Xv%VrR_iWy_sYt3!mSYne*#3bHuXFnrq85=ZR&#HQ!cXE)dI6)^Idm-8|h^ zW3I8yFwd|x*P3f>b>=#8aJ;qNHq$)Q)?jY1%`(rj%{I@r%`wjrkr}LwwkC6vSTlE~VyeqDI|tkonbIL3_Q)|2G@ocVU1yM-eu`#F+oDL*4aE#OzW%y)=w zY1o!-DR*Dx6I>Zs&K%dTP3)J2ZKiQ;8^pG3Y|9zf)=qMfgN=kEdDcxff=uDek(+$% zU0@+&xUuk@yHA$1rlS*QO`t)=Vn zeua4}5y&*`VbySLN4L91UhdjPwv&3x zDl!wXuL{F1Ck=@GE`mBBv&Xf~*)KN>Y#egYh~=ITZKSs&h;J2X!m)Gl+iY==7K@`p zPv#-c?PNaAYzNs+ZrKkm^GxY)eIWW|$qZ2KnJ zh`oL4Pz%!nx^jaRCchArK9=zcBA~ONN>yewBJiPANoAYNiT=U#r@~RB{p1Shi zX(8WJSpVZ_l^Nq-&uemb|B5t^VZi|{40yh?;sg)RdCFIR&BM<h z9I`yWdi{+knBjTKpOXmpdwiei5}?NO;q@y@xafUR1}L#h-scri0PvRg7fR^jVbI&E zfjK!(?u2o(xgY5~V-N|!KWtS$@UE+`y%OOUgbt<~F?!#|oQa%d|GISO9;)}`gz&S>}h9hc6ocqJU zRON0&K{Rw@7Tl)dd*x0J=l5b3$S zqJlG3lcbb`Wk%m~%vfaGwJ|HeJc6Qh$m zmoiqU5!g~Au;osO&4slL9qI^~zahG`2y}H5qPux)S_zv=kmfoucQ=nNJ%YKJH)1YA za%}$WLJJ93L`DRWIOdD4(vJJB1lfysNxWAoQO24iD^D-0?kK}+@K63g5e4Z5|l>LzC1|D zVD=g=^$4CspKy)w3E}eynM`i_+1PKy+a{lA1hxrGKd5P(_ZW1O2q4@XCO7??$L1P! z^PufdVEQ+YZWP<(rXPXLb(5l--1H;RO^$66)0gBwUi{3!&y6#EmPtmhE>ozygX>XY zrpT9U6XR+#DPRWPBPWHTp^p!pMUujoNhn0V^C$F_;xGi^o)SbbIvy=1KN#u9S@rsf z?63!OU09f(5)oa(J!>2(G9jy@*ifoua*f?2x;~}-8Yyu`VRVti7w*Y7#u4-?jGts` znE3vMIg!$^@!=8oh78b8vms^P*wB(Qi4BS>Boiadq+d*dq~fxmZhl3|MShGTsxWhK z>QVh=ld1oeQPrSkCJ$;A*e^1ul}%)m5!h}J)W)Hcc>EXh`ORYs3u=>xp>gQQgmb;2 z&L+l@=^B?Z74~k750xYFLH>WN=JLt3a8-=BK$yl!Rd^2;Kv24sV$P%=XG413So6CO zp|;75po*R^h6LA*S8UJMJkW@|#ATiQ1TQpRQw%xRqwVhFl@4@+u9Mj%t*8WV?V{?9lRH0UuzW<^+5_}&GH+W_!8&58Xb5Biw4`Ms zhae%e3noDu9x5ilib5XCtcxw8T16QYBhuDU36OV68Dk1BRaiM%n7$2D#YUeYuNQ}JFiZopJi^p0lS;E*5 zcbU0kbMX%%&aIX*Uj9GlE|bu@;lgTyby_goj$&g`Puz-#tltn_n9Rp)vPaR~Jhos! znjc*0hlhfjM#t6^_~pqmT2Tv0V>6gg$fPVy8GT9-5qlPd>apdvaf)0vkv-W{kks-Z z76Vx!nnjFbVF(lM?8wT<<5P*lT+Xp5)QRKrIN@M#jAQ9&jU%`xWHnAX9`&s{#XDop zt){E%APZ_}(-BBnyf|W^F=B^3(PWKdBTq=<~DSIeK(9B-0mX zLzSYFO`NLdz~;naxnRdLYJtsJ*vP=+tgU+v^d}E#ErQT3v^jQdvkYl>v|Bq#yQ8z) zPFFUj{O2o{LYlS2Rj0_|&z`~S}kK={= z5ezA;-R-1D_!gu5qq)$MiD%86@D+9%irds}+h(;evTa*;whNnkgdH@y8O~%JVSN~I zs4yFMBHlgRa5QJi_sP>g@yc89v^H^(T9JQ6m*7h*daBf~s}1OChq)apddoa`gWsT{ zQS)JTYSPK+z8I4~#&pGy>Pssf*yT6W1q^k=oQhiJL%sL?`S4HBWb`Y1aryqZeD6oM z!cw3nGpOm?tuWVnxE0DEDe2+DKx(-!PW)8Ozf^R&YPr8^x%Zhxu!sjAy}kshyc?I| zOM<*>bMoR+hg*S*&mT};jw|!Wm5nF;$7Rs$-E|w>9z;FO7bkvd7F=3?xn`}uX07+< z%VCbpTd*3G;Js%JM1x*rqS_b7KJ>x0Fw2|14&DPrw8^J7(bVTS4LM``)TJ3g??2r6he!E=uC%FP2HpysX6c}mz?Jx{yys|ssQMuw_l=su# zC)?qNa(I|-=s;2Y8$I6vCf&D~IvDV2ydQKxKF|M7L-|g)FLno#q@0B_1L4d(F;LnG zn^mxaD>e=p=w2%rXl55A!CSPZ3l`D`@c5tRsUacS5rrdzmm@|xd5=**G-bd&lo}9(!VqWp&j?V+2#xD`t2@3+F!sHjJ*FbAHdKes}yQim`5h4VDs z1{V<3kPX(rD-XG#OvXET+P)o<=wFf{MlMWqDBQ;m8#Ne5WQ4nqPjSa(Br9Z2S-;#} zxii6iZfBfp1x6~IhjPhZ726#hrL})mpog^A|5|}u3)>lZ5J9nGNJ%W4x(RwwkE301 z^z7_(Y!UFiJSXgBV)L_3a#REJg-u(w+qVv>dmKWWwbO1H(lC^5T|M3Q9(K%Y!b@Or zV<>S8?cWAX)8cVLJuG(Y;RX%KwBdL|zsheY3K)tGsRwn2_jH+;b(tr3`gMf?UEwgt zYpQ7dT~MU<>8B0G#2pgo+jl`44519l>D7B7h0Q-!h0<`~H0W?LB#WSaL@<#L1M>X zLMk;MH}`A(2_=Dql0(|TgsjuK{aufiKT+;asKjR!QvLDzlcc}ZZzvBK%9)rFPR3u> zrTKJe{px|ak45)K4`$@_*AHy*<<wU za`f2;P;SfY5DPFOL$sj>LN)|pHk5qqrJNzX{s*vJzW|~_v9^pU0y(!H_T}?saKNIPigz755x_` z55zxF=T9yTB$pmqKtJ6DdkslxeY&ejDSd)yprZnZ=g~@C?%EIZ^yyxxh9Awl;-VrdA3^WA{ zWdl0`hMMyTX8ZKB>7Ij-9rH&f{Ie-d3hp(9u_=Wvy%&QH<7izp%R!+Fm$`ep!59M^ z3mqfcb^B%*K~4`JfQ%&yNd>!g%(qRZJKWA3bB z4)-HtlR=W$2_(MRxPr^&@C9+W++?C~O!A<8GLsU8FqVbpeK23Skg>2*WFaQR!s>t_ z&!^7|Bdn_8JjN!KNSNjxfcsK+tHc0XZxYOGp5kOBO7xWjsLz+^&kleMR?wD%@C6L9 zr!4x-L-<}=dl2%(5}3V@9)fAISW#vo2FaJ`$KQmK0#R>zNpCD}MfeXsI+;Ls(dwg+ z!wc_Hauk-q2Ja7!!UurS<0BaulYfiDD2L9Ecj*$W4m4c%ZP`Y5YDSC<7t^%yT}Y(M zdLd4Mv07-Ndk}R}G17(@?sr7bA9<9dZ~3Z{5 zILF9ZAq^8P6j#cakF%dr^vUd}!aEA9!k>sLxD{z-!%FDm9yF+>pF(UrW>1zeMFIl8 zpYIocsu8<_Z7xPpH_{CcVv<%%JkN0ld6voSoYbO`T4GLwT6p`bQj1n<`9)aE9(w*E z#HOR~JOouU^dljP>l#TfB)oRycO@en@%C8^`%?F_e~{+EM5u$mOC+O3lJ3g`GiDK!cMr5jE{ZVK3ABGeKa6nv?u7D$TVMV(@QFQRu`uy?Z4{cb^$ zdrxw7PDmp5=OTK>2YcS>sG_rvL*8a(`3^WY<_M%en{6*| z#T16?^zfJT|2i^y^bxeVDmw586uR_V#bu2!9a+i=F+vi9GSZVIoO3dBD}ukmX&dR` zxFysVTT(kQ!9D@^V=U~-3)P3)ZCie*md&D1ezYGq_oDRKNeW)LyH{flicrXy-G64U zilj_RpisiY-a%~Q*WpiYdsX+SFuscSov@R;Tegnd3B_qkRu=OD!QRpyo0j&Yxi9O- zA3-?K^TjN*Ti0&2Y}-=WZr!$}T{yv#-#+?GKjgdS3l(f+4Fk6_zz!6Oi-Z-dypaL6 zRTe&Bn9efr3IlH;7*e;jvH0EACj6MyeZ+u|HK?R;-y<&EgrBkc4;Wa_FtJU!5MYmo zu{2FM!k)j$z)=R+9#R-&&7U#Aa@nx^Pc5otm zkAYT>UV0R=3dF3TVT&CxX$%DuNKfZ(-1gWlJKMyBLci;WWclZajJm97-;IxfQgwjk zlNAcO_%TRLZv{=xFy8`EQ?BNe3@q=@>WdyUB;iVKn43ZGdkjpe71i%mEWBK?@X|bg z#qvPK^1dbgb$(+he(2|qK}kZz^wFBD#^iu;>aeVmlE)!?jw$aw)2z#;SfxOvoCIy-{`13aWbW>MgQ?Ea-_fH(x z#NWz)$!;6?YaExqfgk2L{9&mC$5OdcQFVUav)bphQ4zDD_U}P@6XW34%N4g?O7K@K z4OA=*cJUcYB}tx7xtvh%OQ;uFZM|I4>aSQ7s8}@GjaCgnp7Kg&i7&atyJrA$KzD6{ zXcJ%msg{#%oxCL-5`+4K4R% zvvzqM_j>MPCEk2f=IZfwc?Qp5UFwQT*?$`hE2hf+y8u{ussbTfe1tCg?)RZ1OLWuU zqBRf2GoNYd*=64};yA+7bmdv771kj7yy&RRg@i8|U;#wjRf#7F!jr6yfxok(6Z{_mZqLZyFt1i5E`VW9 ztr!E1wC^00x)h3dT+4+(v__GL`@0ZG)U$>Wkgim$MpR)iRwdSi!T7LNsb}o?8ihED zLAvHCC}>nMG9$pa2o$L1FnBvJSL6?KZ#UxnTNvD0YX;ZhoqjPgT9%J>KhisL2fGQNwW9MOGJckSZ zU$QXgLwtC)$?K@%S$L>|SHz+n2f-C}lrMV51;g{Vy^#2FqF>z@P&fM2jf2rKhg3oH zrO!PJ)$wyd_F6_(Q-kca1_hRdfAZeHJ`1;~A&$;{5q<+xY1vC~Ay(X+hyk9F$8E~r zUV=hgE9brpP5eK3+VL{XP|8@7oId_C^eA<#tf%Q0a77nIYcAkUG!?7kpG$DdP1zTxQQ9WfzK z!t9T~=;~6lw=%f-q1oNI720zr#K#l$=xP?1*0O#LI}$NyI=L~2RtmZ&1vf~oy-^zN zIi1u3P zu$F-gR+mZh--IbOSy*0*E9)Mk=vl+^1cmmK)XY=G-!3^-awtYrVb$q!Uvw3I{Oam} zy4t6%ruH}CnzlKv*~qi>c^wnSk{dJx6XUf50CJW<82S%mPlEPm9OaN z_MgJ%fZK#2Xb$+$4CGDIsGRf35Qt4S@%6=!YM`gSsjX7h)b0rO5vH@2ne_O_P@+|^G6T`x|M(cpa%CHq7wf%+ zzk+XrdNDrCUs8OMUivju@Io$Cd=6XSyI%X}c+T|=wB0EgG}1)Nr;*wuj1aU3LZ^sYa{{PpW)ta)O1O|1Viyw$0c oLk0%h5Qx@8Z2wA31Au`m5}0WGS1=_e3ADr92&kqDzk;v+7n0>Upa1{> diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index e63344492fd0..e7c76d0ef74f 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos } status = "" while True and (status not in ["succeeded", "failed", "canceled"]): - time.sleep(0.0001) + time.sleep(0.0001) # prevent being rate limited by replicate response = requests.get(prediction_url, headers=headers) if response.status_code == 200: response_data = response.json() + status = response_data['status'] + print(f"response data: {response_data}") if "output" in response_data: output_string = "".join(response_data['output']) new_output = output_string[len(previous_output):] - yield new_output + yield {"output": new_output, "status": status} previous_output = output_string status = response_data['status'] diff --git a/litellm/main.py b/litellm/main.py index 1b2789f75d83..e343553174ac 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -485,11 +485,11 @@ def completion( # Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN") replicate_key = None replicate_key = ( - get_secret("REPLICATE_API_KEY") - or get_secret("REPLICATE_API_TOKEN") - or api_key + api_key or litellm.replicate_key - or litellm.api_key + or litellm.api_key + or get_secret("REPLICATE_API_KEY") + or get_secret("REPLICATE_API_TOKEN") ) model_response = replicate.completion( @@ -575,7 +575,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, - response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging) + response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging) return response response = model_response elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": @@ -769,7 +769,7 @@ def completion( if stream: model_response = chat.send_message_streaming(prompt, **optional_params) response = CustomStreamWrapper( - model_response, model, custom_llm_provider="vertexai", logging_obj=logging + model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging ) return response diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index da5994ccdd17..980aa14f2415 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -643,24 +643,6 @@ def test_completion_sagemaker(): # test_completion_sagemaker() -def test_completion_sagemaker_stream(): - litellm.set_verbose = False - try: - response = completion( - model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", - messages=messages, - temperature=0.2, - max_tokens=80, - stream=True, - ) - # Add any assertions here to check the response - for chunk in response: - print(chunk) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - -# test_completion_sagemaker_stream() - def test_completion_bedrock_titan(): try: response = completion( diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 10f772c25028..495630300a96 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -9,7 +9,7 @@ 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import litellm -from litellm import completion, acompletion +from litellm import completion, acompletion, AuthenticationError, InvalidRequestError litellm.logging = False litellm.set_verbose = False @@ -187,6 +187,7 @@ def streaming_format_tests(idx, chunk): finished = True if "content" in chunk["choices"][0]["delta"]: extracted_chunk = chunk["choices"][0]["delta"]["content"] + print(f"extracted chunk: {extracted_chunk}") return extracted_chunk, finished def test_completion_cohere_stream(): @@ -199,21 +200,120 @@ def test_completion_cohere_stream(): }, ] response = completion( - model="command-nightly", messages=messages, stream=True, max_tokens=50 + model="command-nightly", messages=messages, stream=True, max_tokens=50, ) complete_response = "" # Add any assertions here to check the response + has_finish_reason = False for idx, chunk in enumerate(response): chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished if finished: break complete_response += chunk + if has_finish_reason is False: + raise Exception("Finish reason not in final chunk") if complete_response.strip() == "": raise Exception("Empty response received") print(f"completion_response: {complete_response}") except Exception as e: pytest.fail(f"Error occurred: {e}") +# test_completion_cohere_stream() + +def test_completion_cohere_stream_bad_key(): + try: + api_key = "bad-key" + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "how does a court case get to the Supreme Court?", + }, + ] + response = completion( + model="command-nightly", messages=messages, stream=True, max_tokens=50, api_key=api_key + ) + complete_response = "" + # Add any assertions here to check the response + has_finish_reason = False + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + if finished: + break + complete_response += chunk + if has_finish_reason is False: + raise Exception("Finish reason not in final chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except AuthenticationError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_cohere_stream_bad_key() + +# def test_completion_nlp_cloud(): +# try: +# messages = [ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "how does a court case get to the Supreme Court?", +# }, +# ] +# response = completion(model="dolphin", messages=messages, stream=True) +# complete_response = "" +# # Add any assertions here to check the response +# has_finish_reason = False +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finish_reason = finished +# complete_response += chunk +# if finished: +# break +# if has_finish_reason is False: +# raise Exception("Finish reason not in final chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_nlp_cloud() + +# def test_completion_nlp_cloud_bad_key(): +# try: +# api_key = "bad-key" +# messages = [ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "how does a court case get to the Supreme Court?", +# }, +# ] +# response = completion(model="dolphin", messages=messages, stream=True, api_key=api_key) +# complete_response = "" +# # Add any assertions here to check the response +# has_finish_reason = False +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finish_reason = finished +# complete_response += chunk +# if finished: +# break +# if has_finish_reason is False: +# raise Exception("Finish reason not in final chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_nlp_cloud_bad_key() + # def test_completion_hf_stream(): # try: # messages = [ @@ -235,10 +335,41 @@ def test_completion_cohere_stream(): # if complete_response.strip() == "": # raise Exception("Empty response received") # print(f"completion_response: {complete_response}") +# except InvalidRequestError as e: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# # test_completion_hf_stream() + +# def test_completion_hf_stream_bad_key(): +# try: +# api_key = "bad-key" +# messages = [ +# { +# "content": "Hello! How are you today?", +# "role": "user" +# }, +# ] +# response = completion( +# model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base="https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, api_key=api_key +# ) +# complete_response = "" +# # Add any assertions here to check the response +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# if finished: +# break +# complete_response += chunk +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except InvalidRequestError as e: +# pass # except Exception as e: # pytest.fail(f"Error occurred: {e}") -# test_completion_hf_stream() +# test_completion_hf_stream_bad_key() def test_completion_claude_stream(): try: @@ -266,9 +397,202 @@ def test_completion_claude_stream(): pytest.fail(f"Error occurred: {e}") # test_completion_claude_stream() + +def test_completion_claude_stream_bad_key(): + try: + api_key = "bad-key" + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "how does a court case get to the Supreme Court?", + }, + ] + response = completion( + model="claude-instant-1", messages=messages, stream=True, max_tokens=50, api_key=api_key + ) + complete_response = "" + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_claude_stream_bad_key() + +def test_completion_replicate_stream(): + try: + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "how does a court case get to the Supreme Court?", + }, + ] + response = completion( + model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50 + ) + complete_response = "" + has_finish_reason = False + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + if finished: + break + complete_response += chunk + if has_finish_reason is False: + raise Exception("finish reason not set for last chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except InvalidRequestError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") +# test_completion_replicate_stream() + +# def test_completion_vertexai_stream(): +# try: +# import os +# os.environ["VERTEXAI_PROJECT"] = "pathrise-convert-1606954137718" +# os.environ["VERTEXAI_LOCATION"] = "us-central1" +# messages = [ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "how does a court case get to the Supreme Court?", +# }, +# ] +# response = completion( +# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50 +# ) +# complete_response = "" +# has_finish_reason = False +# # Add any assertions here to check the response +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finish_reason = finished +# if finished: +# break +# complete_response += chunk +# if has_finish_reason is False: +# raise Exception("finish reason not set for last chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except InvalidRequestError as e: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_vertexai_stream() + + +# def test_completion_vertexai_stream_bad_key(): +# try: +# import os +# messages = [ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "how does a court case get to the Supreme Court?", +# }, +# ] +# response = completion( +# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50 +# ) +# complete_response = "" +# has_finish_reason = False +# # Add any assertions here to check the response +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finish_reason = finished +# if finished: +# break +# complete_response += chunk +# if has_finish_reason is False: +# raise Exception("finish reason not set for last chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except InvalidRequestError as e: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_vertexai_stream_bad_key() + +def test_completion_replicate_stream(): + try: + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "how does a court case get to the Supreme Court?", + }, + ] + response = completion( + model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50 + ) + complete_response = "" + has_finish_reason = False + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + if finished: + break + complete_response += chunk + if has_finish_reason is False: + raise Exception("finish reason not set for last chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except InvalidRequestError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +def test_completion_replicate_stream_bad_key(): + try: + api_key = "bad-key" + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "how does a court case get to the Supreme Court?", + }, + ] + response = completion( + model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50, api_key=api_key + ) + complete_response = "" + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except InvalidRequestError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_replicate_stream_bad_key() + def test_completion_bedrock_ai21_stream(): try: - litellm.set_verbose = False response = completion( model="bedrock/amazon.titan-tg1-large", messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}], @@ -276,21 +600,83 @@ def test_completion_bedrock_ai21_stream(): max_tokens=4096, stream=True, ) - complete_response = "" - # Add any assertions here to check the response - print(response) + complete_response = "" + has_finish_reason = False + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + complete_response += chunk + if finished: + break + if has_finish_reason is False: + raise Exception("finish reason not set for last chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_bedrock_ai21_stream() + +def test_completion_bedrock_ai21_stream_bad_key(): + try: + response = completion( + model="bedrock/amazon.titan-tg1-large", + messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}], + temperature=1, + max_tokens=4096, + stream=True, + ) + complete_response = "" + has_finish_reason = False + # Add any assertions here to check the response for idx, chunk in enumerate(response): chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished if finished: break complete_response += chunk + if has_finish_reason is False: + raise Exception("finish reason not set for last chunk") if complete_response.strip() == "": raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except InvalidRequestError as e: + pass except Exception as e: pytest.fail(f"Error occurred: {e}") +# test_completion_bedrock_ai21_stream_bad_key() -# test_completion_cohere_stream() +def test_completion_sagemaker_stream(): + try: + response = completion( + model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", + messages=messages, + temperature=0.2, + max_tokens=80, + stream=True, + ) + complete_response = "" + has_finish_reason = False + # Add any assertions here to check the response + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + if finished: + break + complete_response += chunk + if has_finish_reason is False: + raise Exception("finish reason not set for last chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + except InvalidRequestError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +test_completion_sagemaker_stream() # test on openai completion call def test_openai_text_completion_call(): @@ -314,16 +700,20 @@ def test_openai_text_completion_call(): def ai21_completion_call(): try: response = completion( - model="j2-ultra", messages=messages, stream=True, logger_fn=logger_fn + model="j2-ultra", messages=messages, stream=True ) print(f"response: {response}") + has_finished = False complete_response = "" start_time = time.time() for idx, chunk in enumerate(response): chunk, finished = streaming_format_tests(idx, chunk) + has_finished = finished + complete_response += chunk if finished: break - complete_response += chunk + if has_finished is False: + raise Exception("finished reason missing from final chunk") if complete_response.strip() == "": raise Exception("Empty response received") print(f"completion_response: {complete_response}") @@ -331,6 +721,82 @@ def ai21_completion_call(): pytest.fail(f"error occurred: {traceback.format_exc()}") # ai21_completion_call() + +def ai21_completion_call_bad_key(): + try: + api_key = "bad-key" + response = completion( + model="j2-ultra", messages=messages, stream=True, api_key=api_key + ) + print(f"response: {response}") + complete_response = "" + start_time = time.time() + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"completion_response: {complete_response}") + except InvalidRequestError as e: + pass + except: + pytest.fail(f"error occurred: {traceback.format_exc()}") + +# ai21_completion_call_bad_key() + +def test_completion_aleph_alpha(): + try: + response = completion( + model="luminous-base", messages=messages, stream=True + ) + # Add any assertions here to check the response + has_finished = False + complete_response = "" + start_time = time.time() + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finished = finished + complete_response += chunk + if finished: + break + if has_finished is False: + raise Exception("finished reason missing from final chunk") + if complete_response.strip() == "": + raise Exception("Empty response received") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_aleph_alpha() + +# def test_completion_aleph_alpha_bad_key(): +# try: +# api_key = "bad-key" +# response = completion( +# model="luminous-base", messages=messages, stream=True, api_key=api_key +# ) +# # Add any assertions here to check the response +# has_finished = False +# complete_response = "" +# start_time = time.time() +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finished = finished +# complete_response += chunk +# if finished: +# break +# if has_finished is False: +# raise Exception("finished reason missing from final chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# except InvalidRequestError as e: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_aleph_alpha_bad_key() + # test on openai completion call def test_openai_chat_completion_call(): try: @@ -366,11 +832,15 @@ def test_together_ai_completion_call_starcoder(): ) complete_response = "" print(f"returned response object: {response}") + has_finish_reason = False for idx, chunk in enumerate(response): chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished if finished: break complete_response += chunk + if has_finish_reason is False: + raise Exception("Finish reason not set for last chunk") if complete_response == "": raise Exception("Empty response received") print(f"complete response: {complete_response}") @@ -378,6 +848,38 @@ def test_together_ai_completion_call_starcoder(): print(f"error occurred: {traceback.format_exc()}") pass +# test_together_ai_completion_call_starcoder() + +def test_together_ai_completion_call_starcoder_bad_key(): + try: + api_key = "bad-key" + start_time = time.time() + response = completion( + model="together_ai/bigcode/starcoder", + messages=messages, + stream=True, + api_key=api_key + ) + complete_response = "" + has_finish_reason = False + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + has_finish_reason = finished + if finished: + break + complete_response += chunk + if has_finish_reason is False: + raise Exception("Finish reason not set for last chunk") + if complete_response == "": + raise Exception("Empty response received") + print(f"complete response: {complete_response}") + except InvalidRequestError as e: + pass + except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# test_together_ai_completion_call_starcoder_bad_key() #### Test Function calling + streaming #### def test_completion_openai_with_functions(): diff --git a/litellm/utils.py b/litellm/utils.py index 0d2ce8c9587c..046c82cf192a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2,6 +2,7 @@ import dotenv, json, traceback, threading import subprocess, os import litellm, openai +import itertools import random, uuid, requests import datetime, time import tiktoken @@ -1915,7 +1916,6 @@ def exception_type( ): global user_logger_fn, liteDebuggerClient exception_mapping_worked = False - if litellm.set_verbose == True: litellm.error_logs['EXCEPTION'] = original_exception litellm.error_logs['KWARGS'] = completion_kwargs @@ -1970,7 +1970,7 @@ def exception_type( exception_type = type(original_exception).__name__ else: exception_type = "" - if "claude" in model: # one of the anthropics + if custom_llm_provider == "anthropic": # one of the anthropics if hasattr(original_exception, "message"): if "prompt is too long" in original_exception.message: exception_mapping_worked = True @@ -1979,6 +1979,13 @@ def exception_type( model=model, llm_provider="anthropic" ) + if "Invalid API Key" in original_exception.message: + exception_mapping_worked = True + raise AuthenticationError( + message=original_exception.message, + model=model, + llm_provider="anthropic" + ) if hasattr(original_exception, "status_code"): print_verbose(f"status_code: {original_exception.status_code}") if original_exception.status_code == 401: @@ -2031,7 +2038,7 @@ def exception_type( llm_provider="anthropic", model=model ) - elif "replicate" in model: + elif custom_llm_provider == "replicate": if "Incorrect authentication token" in error_str: exception_mapping_worked = True raise AuthenticationError( @@ -2068,7 +2075,7 @@ def exception_type( llm_provider="replicate", model=model ) - elif original_exception.status_code == 400: + elif original_exception.status_code == 400 or original_exception.status_code == 422: exception_mapping_worked = True raise InvalidRequestError( message=f"ReplicateException - {original_exception.message}", @@ -2110,7 +2117,31 @@ def exception_type( llm_provider="replicate", model=model ) - elif model in litellm.cohere_models or custom_llm_provider == "cohere": # Cohere + elif custom_llm_provider == "bedrock": + if "Unable to locate credentials" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"BedrockException - {error_str}", + model=model, + llm_provider="bedrock" + ) + elif custom_llm_provider == "sagemaker": + if "Unable to locate credentials" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"SagemakerException - {error_str}", + model=model, + llm_provider="sagemaker" + ) + elif custom_llm_provider == "vertex_ai": + if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai" + ) + elif custom_llm_provider == "cohere": # Cohere if ( "invalid api token" in error_str or "No API key provided." in error_str @@ -2184,6 +2215,13 @@ def exception_type( model=model, llm_provider="huggingface" ) + elif "A valid user token is required" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=error_str, + llm_provider="huggingface", + model=model + ) if hasattr(original_exception, "status_code"): if original_exception.status_code == 401: exception_mapping_worked = True @@ -2221,6 +2259,8 @@ def exception_type( llm_provider="huggingface", model=model ) + exception_mapping_worked = True + raise APIError(status_code=500, message=error_str, model=model, llm_provider=custom_llm_provider) elif custom_llm_provider == "ai21": if hasattr(original_exception, "message"): if "Prompt has too many tokens" in original_exception.message: @@ -2230,6 +2270,13 @@ def exception_type( model=model, llm_provider="ai21" ) + if "Bad or missing API token." in original_exception.message: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"AI21Exception - {original_exception.message}", + model=model, + llm_provider="ai21" + ) if hasattr(original_exception, "status_code"): if original_exception.status_code == 401: exception_mapping_worked = True @@ -2266,7 +2313,7 @@ def exception_type( llm_provider="ai21", model=model ) - elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud": + elif custom_llm_provider == "nlp_cloud": if "detail" in error_str: if "Input text length should not exceed" in error_str: exception_mapping_worked = True @@ -2342,6 +2389,7 @@ def exception_type( model=model ) elif custom_llm_provider == "together_ai": + import json error_response = json.loads(error_str) if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]: exception_mapping_worked = True @@ -2364,6 +2412,13 @@ def exception_type( model=model, llm_provider="together_ai" ) + elif "error" in error_response and "API key doesn't match expected format." in error_response["error"]: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai" + ) elif "error_type" in error_response and error_response["error_type"] == "validation": exception_mapping_worked = True raise InvalidRequestError( @@ -2393,7 +2448,7 @@ def exception_type( llm_provider="together_ai", model=model ) - elif model in litellm.aleph_alpha_models: + elif custom_llm_provider == "aleph_alpha": if "This is longer than the model's maximum context length" in error_str: exception_mapping_worked = True raise ContextWindowExceededError( @@ -2401,6 +2456,13 @@ def exception_type( llm_provider="aleph_alpha", model=model ) + elif "InvalidToken" in error_str or "No token provided" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model + ) elif hasattr(original_exception, "status_code"): print(f"status code: {original_exception.status_code}") if original_exception.status_code == 401: @@ -2445,7 +2507,8 @@ def exception_type( elif custom_llm_provider == "ollama": if "no attribute 'async_get_ollama_response_stream" in error_str: raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'") - raise original_exception + exception_mapping_worked = True + raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model) except Exception as e: # LOGGING exception_logging( @@ -2563,6 +2626,7 @@ def __init__(self, completion_stream, model, custom_llm_provider=None, logging_o self.logging_obj = logging_obj self.completion_stream = completion_stream self.sent_first_chunk = False + self.sent_last_chunk = False if self.logging_obj: # Log the type of the received item self.logging_obj.post_call(str(type(completion_stream))) @@ -2579,41 +2643,71 @@ def logging(self, text): def handle_anthropic_chunk(self, chunk): str_line = chunk.decode("utf-8") # Convert bytes to string + print(f"str_line: {str_line}") + text = "" + is_finished = False + finish_reason = None if str_line.startswith("data:"): data_json = json.loads(str_line[5:]) - return data_json.get("completion", "") - return "" + text = data_json.get("completion", "") + if data_json.get("stop_reason", None): + is_finished = True + finish_reason = data_json["stop_reason"] + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} + elif "error" in str_line: + raise ValueError(f"Unable to parse response. Original response: {str_line}") + else: + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} def handle_together_ai_chunk(self, chunk): chunk = chunk.decode("utf-8") - text_index = chunk.find('"text":"') # this checks if text: exists - text_start = text_index + len('"text":"') - text_end = chunk.find('"}', text_start) - if text_index != -1 and text_end != -1: - extracted_text = chunk[text_start:text_end] - return extracted_text + text = "" + is_finished = False + finish_reason = None + if "text" in chunk: + text_index = chunk.find('"text":"') # this checks if text: exists + text_start = text_index + len('"text":"') + text_end = chunk.find('"}', text_start) + if text_index != -1 and text_end != -1: + extracted_text = chunk[text_start:text_end] + text = extracted_text + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} + elif "[DONE]" in chunk: + return {"text": text, "is_finished": True, "finish_reason": "stop"} + elif "error" in chunk: + raise ValueError(chunk) else: - return "" + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} def handle_huggingface_chunk(self, chunk): chunk = chunk.decode("utf-8") + text = "" + is_finished = False + finish_reason = "" if chunk.startswith("data:"): data_json = json.loads(chunk[5:]) + print(f"data json: {data_json}") if "token" in data_json and "text" in data_json["token"]: text = data_json["token"]["text"] if "meta-llama/Llama-2" in self.model: #clean eos tokens like from the returned output text if any(token in text for token in llama_2_special_tokens): text = text.replace("", "").replace("", "") - return text - else: - return "" - return "" + if data_json.get("details", False) and data_json["details"].get("finish_reason", False): + is_finished = True + finish_reason = data_json["details"]["finish_reason"] + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} + elif "error" in chunk: + raise ValueError(chunk) + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} - def handle_ai21_chunk(self, chunk): + def handle_ai21_chunk(self, chunk): # fake streaming chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - return data_json["completions"][0]["data"]["text"] + text = data_json["completions"][0]["data"]["text"] + is_finished = True + finish_reason = "stop" + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2621,8 +2715,10 @@ def handle_nlp_cloud_chunk(self, chunk): chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - print(f"data json: {data_json}") - return data_json["generated_text"] + text = data_json["generated_text"] + is_finished = True + finish_reason = "stop" + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2630,7 +2726,10 @@ def handle_aleph_alpha_chunk(self, chunk): chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - return data_json["completions"][0]["completion"] + text = data_json["completions"][0]["completion"] + is_finished = True + finish_reason = "stop" + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2638,7 +2737,35 @@ def handle_cohere_chunk(self, chunk): chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - return data_json["text"] + text = "" + is_finished = False + finish_reason = "" + if "text" in data_json: + text = data_json["text"] + elif "is_finished" in data_json: + is_finished = data_json["is_finished"] + finish_reason = data_json["finish_reason"] + else: + raise Exception(data_json) + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} + except: + raise ValueError(f"Unable to parse response. Original response: {chunk}") + + def handle_replicate_chunk(self, chunk): + print(f"chunk: {chunk}") + try: + text = "" + is_finished = False + finish_reason = "" + if "output" in chunk: + text = chunk['output'] + if "status" in chunk: + if chunk["status"] == "succeeded": + is_finished = True + finish_reason = "stop" + elif chunk.get("error", None): + raise Exception(chunk["error"]) + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2683,13 +2810,21 @@ def handle_baseten_chunk(self, chunk): traceback.print_exc() return "" - def handle_bedrock_stream(self): - if self.completion_stream: - event = next(self.completion_stream) - chunk = event.get('chunk') - if chunk: - chunk_data = json.loads(chunk.get('bytes').decode()) - return chunk_data['outputText'] + def handle_bedrock_stream(self, chunk): + chunk = chunk.get('chunk') + if chunk: + chunk_data = json.loads(chunk.get('bytes').decode()) + text = "" + is_finished = False + finish_reason = "" + if "outputText" in chunk_data: + text = chunk_data['outputText'] + if chunk_data.get("completionReason", None): + is_finished = True + finish_reason = chunk_data["completionReason"] + elif chunk.get("error", None): + raise Exception(chunk["error"]) + return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} return "" ## needs to handle the empty string case (even starting chunk can be an empty string) @@ -2701,49 +2836,94 @@ def __next__(self): completion_obj = {"content": ""} if self.custom_llm_provider and self.custom_llm_provider == "anthropic": chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_anthropic_chunk(chunk) + response_obj = self.handle_anthropic_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.model == "replicate" or self.custom_llm_provider == "replicate": chunk = next(self.completion_stream) - completion_obj["content"] = chunk + response_obj = self.handle_replicate_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif ( self.custom_llm_provider and self.custom_llm_provider == "together_ai"): chunk = next(self.completion_stream) - text_data = self.handle_together_ai_chunk(chunk) - if text_data == "": - return self.__next__() - completion_obj["content"] = text_data + response_obj = self.handle_together_ai_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider and self.custom_llm_provider == "huggingface": chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_huggingface_chunk(chunk) + response_obj = self.handle_huggingface_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming chunk = next(self.completion_stream) completion_obj["content"] = self.handle_baseten_chunk(chunk) elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_ai21_chunk(chunk) + response_obj = self.handle_ai21_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider and self.custom_llm_provider == "vllm": chunk = next(self.completion_stream) completion_obj["content"] = chunk[0].outputs[0].text - elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming + elif self.custom_llm_provider and self.custom_llm_provider == "aleph_alpha": #aleph alpha doesn't provide streaming chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk) + response_obj = self.handle_aleph_alpha_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai": chunk = next(self.completion_stream) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud": - chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk) - elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models): - chunk = next(self.completion_stream) - completion_obj["content"] = str(chunk) + try: + chunk = next(self.completion_stream) + response_obj = self.handle_nlp_cloud_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] + except Exception as e: + if self.sent_last_chunk: + raise e + else: + if self.sent_first_chunk is False: + raise Exception("An unknown error occurred with the stream") + model_response.choices[0].finish_reason = "stop" + self.sent_last_chunk = True + elif self.custom_llm_provider and self.custom_llm_provider == "vertex_ai": + try: + chunk = next(self.completion_stream) + completion_obj["content"] = str(chunk) + except StopIteration as e: + if self.sent_last_chunk: + raise e + else: + model_response.choices[0].finish_reason = "stop" + self.sent_last_chunk = True elif self.custom_llm_provider == "cohere": chunk = next(self.completion_stream) - completion_obj["content"] = self.handle_cohere_chunk(chunk) + response_obj = self.handle_cohere_chunk(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider == "bedrock": - completion_obj["content"] = self.handle_bedrock_stream() + chunk = next(self.completion_stream) + response_obj = self.handle_bedrock_stream(chunk) + completion_obj["content"] = response_obj["text"] + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj["finish_reason"] elif self.custom_llm_provider == "sagemaker": if len(self.completion_stream)==0: - raise StopIteration + if self.sent_last_chunk: + raise StopIteration + else: + model_response.choices[0].finish_reason = "stop" + self.sent_last_chunk = True chunk_size = 30 new_chunk = self.completion_stream[:chunk_size] completion_obj["content"] = new_chunk @@ -2765,11 +2945,13 @@ def __next__(self): self.sent_first_chunk = True model_response.choices[0].delta = Delta(**completion_obj) return model_response + elif model_response.choices[0].finish_reason: + return model_response except StopIteration: raise StopIteration - except Exception as e: - model_response.choices[0].finish_reason = "stop" - return model_response + except Exception as e: + e.message = str(e) + return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e) async def __anext__(self): try: @@ -2796,7 +2978,6 @@ def read_config_args(config_path) -> dict: # read keys/ values from config file and return them return config except Exception as e: - print("An error occurred while reading config:", str(e)) raise e ########## experimental completion variants ############################ @@ -2899,7 +3080,6 @@ def get_model_split_test(models, completion_call_id): try: # make the api call last_fetched_at = time.time() - print(f"last_fetched_at: {last_fetched_at}") response = requests.post( #http://api.litellm.ai url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict diff --git a/pyproject.toml b/pyproject.toml index bf0f0097c76d..80b3eb99d1f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.738" +version = "0.1.739" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"