From 95294d987c7d9d2c08d0b763c683368bf53024c4 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Tue, 28 Jan 2025 09:22:39 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20rerun=20for=20entries=20th?= =?UTF-8?q?at=20came=20from=20zips?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when rerunning an entry that came from a zip file, the rerun would fail because it would look for the CSV in an assumed location which is based on it's last importer ID. Since this was a rerun, it does not do an unzip into the assumed location so the directory does not exist. This commit will first check if the assumed location exists, and if not, it will look for the location of the last unizpped files and use that for the rerun. This does cause an interesting behavior where if the entry is a work with a file attached, it will add the file again resulting in duplicate files. I feel this is such an edge case though because typically if the entry is successful, the user will not rerun it. I added a hint text to the importer to let the user know this is a possibility. Ref: - https://github.com/notch8/palni_palci_knapsack/issues/210 --- app/models/bulkrax/importer.rb | 8 ++++++++ app/parsers/bulkrax/csv_parser.rb | 4 ++++ spec/fixtures/csv/files/moon.jpg | Bin 0 -> 4264 bytes 3 files changed, 12 insertions(+) create mode 100644 spec/fixtures/csv/files/moon.jpg diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 3e5d01967..166f375e1 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -239,6 +239,14 @@ def import_metadata_format # If the import data is zipped, unzip it to this path def importer_unzip_path @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}") + return @importer_unzip_path if Dir.exist?(@importer_unzip_path) + + # turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229" + base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_') + + # If we don't have an existing unzip path, we'll try and find it. + # Just in case there are multiple paths, we sort by the number at the end of the path and get the last one + @importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last end def errored_entries_csv_path diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index e1a7c2fba..bd7dc41a8 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -360,6 +360,10 @@ def path_to_files(**args) @path_to_files = File.join( zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename ) + + return @path_to_files if File.exist?(@path_to_files) + + File.join(importer_unzip_path, 'files', filename) if file? && zip? end private diff --git a/spec/fixtures/csv/files/moon.jpg b/spec/fixtures/csv/files/moon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..136fed4c8ef0d67708fe101eca1d7506c258f2fa GIT binary patch literal 4264 zcmbVOXIK;4)}Bx#6h)ArAjLzGP^BX(L`o`P>Tm=2R|7ZWfJpBux z2XL~pv;Dl-*%{`+;AH3IFy;Xk9#-C8lynXqLpZaE`SB@7 zCFX#|b<64@$j{#;R9yU{**W-+pAZm~JS_#4mQhtx*Ep+rPEQ|hU}$7~$=m{EX=QC= z>w4YI-NVz%J0LJ9IOKL{Sj^qnxcGbbF;AW*C8wmOrDx>g^6)S73kr+MD=LXq)vw>w zG&D9fe`xvGO78CI?dzuu3=WM?OioSD%+AeIS5|+lt#52@ZSNfL0s+kbru8SYfAHdA z@M2o3r0BG36*QpKfAKSv|YB3i3O|#s4!0zl7?z zXp3t0(CI%f_7RJjL--jny9PD<|7M{E6f0)W%v}BiLq}$96xd3ZFl64Gukmp3ck` zrvdAGD)#=@dfG2iB)OJ(jE5ptf6xKtg=^3@-=ODx5V9>bBQ~95suMhUBYu{7KXqYm zA*ZsglH|};{fVvf2JDk0DtZ#0n5bhtcz1@3fWu!N3hYn;;+o_(3ef(7ft2uP11s{xR z_pWks&9v87Vp;TOGn%3rcCABJ(?2-o(0Yu+N-2AtKCJhHj+Z^7;Pwx-d9Un$1;c{k zWybVSSCCB$a#vc#G6~Z45Q6i$kMH!;WFltJsaxi+$y z9XN*u`=@juUb2VimRUmT=bg;0)=%BzuBHP;#D#>cu|nZ+7h+|})jYB8`MTfEV)W>M zJV4{$q@g?K{fozjh8~5K8jexgr<$E1q!RfWoHqtq`trhyTnrTW3rY-+xL^Qi?q85O zNw~fAB7<#S>@oMIwg1WQg$0F4788^5X_Hd?8E3b+5{*`1y!z88!jvZrvzg(gVl7^H0C>( z?V{r3kTm&@csT{lhZa?NDxhPYdLh_#E*G<5@^4>YJzp?kG;tXK6qliSVE*5nQVtr} zw3Q%@r=Nr^R(~|4%Bl}y`LH+^%h7_`%H8pK9`x?lI9hU)X9U8DyGe2SRbCBlutEEV zR<=PPv_9Ex_q|{=Rf{YXV1D$((XgXBdp>WPUB0(!6IK{%pDk@w zVVY3XA=E%xoBB;c6OqNACLEhTMzGDd8ZJK>h6pnb#Fyx-WdLEd6)A$d)2?gkuM|5C6ykn>Em4DAt ziLdu^g=sAv@Iorl0j68TcyDcl6Ql|0*e~0*uJurmcxbQVfy(Mft@^!Y+oF^^l5`-F z4%k*ZX!^Vp6oqwU21Tfw)z}-EZbrho%N2TL`l@fk`m#6USBq<-lGT2s4eVaH@?;rZ zXWrG&<{DsU4F;(FA#Rj=aeg&D?jnDWTw=hu4Y59HH0o zQW9$(esGU@jh7p}=M!gey{wO4j)Ehpz>pB%mRC_8r zvfo4p!gnmMPfn}hgCne4cJ*nx74Ew)--WB7znM-2J69Q;Pk8>W5?MFqTLOt%%FppI zAnghd@CxZ^i$B%kqXV1~qW!h!#@A(?Z-%OhFGUGB-7*thDfxB`aqNTqY2UG?x>_lk ztpTi{ltA`#{NT%Oh3lb|P0sK^tc&5=Lv$dW2M-)hVoJLOZO6U^ zy!?9Iw{dTs@Wfb1>20~9#YLO@bHFQP=v1%5m3Hu>2x~Jsz!AQi#RwAbRYsX^==MG{ z8jemMiOKoq)^R2lZOPj&2eO6cB_oET5C%~f2`LM$0bin=KnBlBQ2NUy$W6DL_duYK zd0{1SgU2HcRjYYjBr^L`#Vkyk5THIUw86&weWGH?4xqHUczpBYO--M7o-9`$-s@P; ztu9tASdu@LzU8G+t^I6(Lv}Df$tshNf(j_N-ZA_2%!@Ie`up+<7srztW`}=k+egbZ zJIu98+(323SvrEnNx|omONSO`?{U2rx$d-_e~P=*^urtQHYBv~xFOFs{%*>QM{3_} z4td8jNYK4dc;X(u!2voQ#tzUVXt|XO@k)>Nq-DrD$b1%_VIDhm+d+piC+4||g3kR7 z+;#$u6lN0YLy12`5c{AqyLZ{&p9T1OrXpWdnL`?<-Iw!lGidhqT?qu|Q-7v9g z9HRv1d;drFNi&>ov}#n(V_=i^m7^r2V8|v~xcYKJio|p$rTfk9g9jv?`lN67O^}i> z=%)6|x0%6Sm%$mfZPnY)D;1v*mM(KOa?m~r- z&0u>!^QZx<_bz^FN-6=*YCZC$4V0$h?jJ$P>0;M|E}8*zGSHrDv&$lN<46i28n=k+ zkRNPBS_xlG+z{7@h@pCU7cal5!akrZI9O*ImgNnNh`r8nN0B2jrIxA`%do(q{NsqVb4D)K&T)`=KCcs- zLrQRCctq|KOu2Sw)yP1AeHOv{<{8d{CJl*?X>+1msXH{aew5v=KB@Y)9f-Deb)O>4 zy{0znZf#nE@UtsHP-03Ky&V=eHPgN~WOW@9mI1djPNqxv>yI!sSuZek0xVXuDkewT zH>euN15x)zhLn7~98_AFlYNV_WDSIaQ4J$@vwdGL(1EeTbfDRsL%LuHqD3e@oNm?V zWnvpV=M3|(+DcTz^{utSreEvL%apsWh>Mok*by{#Qx!FK%YUI6ys09Z58>2(<{dk< z#R}e?&EG<&fBGKi*zHMuxH0)uRgX%AmFVuoJr^CViNTVoEvxn!GqD*v=FPd-KK}{L zJ85U$US>7+J?(S()u4jDv|Ps*&N8&d>{{_QQOjgj=B|#wx8q~tBb~4mX~M5K;-xZZ zsR~c#gpX{1O{f>6(;8BpGw+vKZZ8zB*3^_(HJ*A(y=rIF&>IOdv5;R|FBKns))i1R z{W>z3DPNznb$5&$Yes^dyAyUIj6-CtS`#me#lJ+JzbmpI8z%A@?vQRf(RRA|{ccNs zHObulz3PmMu~dmgmKms&pXA{fG8fdH-&GoLhUG|pBx_)pLrx_T?z}q)3)d)cr-?S= zuY2*|41x_!i@u4i3+6U_9J0eP@3T-Y$6b-of11ds8`yK@#)yP=Mhff%kPnV1Wwkr1 z!`wu>(j9%*83dz=F$>4#qdkUZB_KdxvbPu_;YTz0`*_2{fW&t*Eo*!Uqs} zP~7NvRBM6F{i!T3d}LAgr$eEyWRl%b3uVM}Ap?Qsk%z6Ss-7XT+w6-k>13ELmA=^2 zDyXV@)9WTH+QyqbN;V|py23K3H%ner-gl0@MQD{uabIe`<1<1o&%fu5hcD6GD5XNG zp{^?tdJ~wlu2^W2oM(>3k%nwIWYbupIDws!AB-a2^hdwy;WTgN=&s*nC60?~oUBM# zpaaxmcK)T-z1lQ#_V}*_G-}^~xdc`^5tj`4TtkR7TyRuBFOA1+PA;|mSU*4KDtXnv z(;`4=m>KKsCzg>ht1EEEO`(xOPI{6Za|Hd|{Zi^i{Jt1F#Be3*4Qv@d@$Nh%#j3F` z>|BVQ$U_I2u|~AQ%Q(1#kETNXvUy2E*80q9B`;3GwnKqhEahbDqy>`$;cT-Mr#Z zV^BJfQvUUK*djK%)T-~RKeVBz$$K|&I}_#Cs$eTSqYTZB<%cFB_?%G&tlAja`6tFU zB6c@xf6v88--G7mM)LoyNjPX2I0EK}0_)Si;YOlEPC<<~rDN=$4;8B@8z!6G7LeNh z4FByz$fe9{_!D{i+`e*|_|thy_hRZcA1Q3oR&p`hdP81Z3UpvpjfA(h9zMODJKDJ0 zzO@(C%x6`WWPW#H$O~rvKvasu>Oom8pQ3K9Gv;hdp7-%>i>yk7lm9_sbY=)2_W5th KA4C9}KKL(Q`q;Gq literal 0 HcmV?d00001