From e092d14c8651519b060c876a8ee368120ae9a91f Mon Sep 17 00:00:00 2001 From: Ron Hildebrandt Date: Fri, 13 Dec 2024 11:00:27 +0100 Subject: [PATCH] adjustements to include .rod downloads and parsing other rod files --- src/download_rod_files/README.md | 34 ++++++++++++++++++ .../convert_all_rod_to_nxs.sh | 14 ++++++++ .../download_rods_script.sh | 6 ++++ .../config/config_file_rod.json | 4 +-- src/pynxtools_raman/reader.py | 32 +++++++++++------ tests/data/rod/example.nxs | Bin 68760 -> 68784 bytes tests/data/witec/example.nxs | Bin 78784 -> 78784 bytes 7 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 src/download_rod_files/README.md create mode 100755 src/download_rod_files/convert_all_rod_to_nxs.sh create mode 100755 src/download_rod_files/download_rods_script.sh diff --git a/src/download_rod_files/README.md b/src/download_rod_files/README.md new file mode 100644 index 0000000..8f3d8e5 --- /dev/null +++ b/src/download_rod_files/README.md @@ -0,0 +1,34 @@ +# Downloading multiple .rod files + +## download_rods_script.sh + +Adjust the file `download_rods_script.sh`to the range of download you want. +Default start is `1` and default end is `3`. +Be careful: Do not trigger unneccsary amounts of downloads. + +Take a look [here](https://solsa.crystallography.net/rod/1000679.html), to get valid .rod IDs. For this example its e.g. `1000679`. + +## Add the command as script + +`chmod +x download_rods_script.sh` + +## Exectutute the script + +`./download_rods_script.sh` + + +## Convert the downloaded .rod files + +via the pynxtools-raman command: + +`dataconverter /1000679.rod src/pynxtools_raman/config/config_file_rod.json --reader raman --nxdl NXraman --output rod_example_neuxs.nxs` + + +# Automatec conversion of all .rod files to .nxs files + +## Add the command as script +`chmod +x convert_all_rod_to_nxs.sh` + +## Call the script +`./src/download_rod_files/convert_all_rod_to_nxs.sh` + diff --git a/src/download_rod_files/convert_all_rod_to_nxs.sh b/src/download_rod_files/convert_all_rod_to_nxs.sh new file mode 100755 index 0000000..ac2c41a --- /dev/null +++ b/src/download_rod_files/convert_all_rod_to_nxs.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Define the folder containing the .rod files +folder_path="./src/download_rod_files" + +# Loop over all .rod files in the folder +for file in "$folder_path"/*.rod; do + # Extract the base name (without extension) + base_name=$(basename "$file" .rod) + + # Execute the command with the base name + dataconverter "$file" src/pynxtools_raman/config/config_file_rod.json \ + --reader raman --nxdl NXraman --output "${base_name}.nxs" +done \ No newline at end of file diff --git a/src/download_rod_files/download_rods_script.sh b/src/download_rod_files/download_rods_script.sh new file mode 100755 index 0000000..939b45f --- /dev/null +++ b/src/download_rod_files/download_rods_script.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +#for X in {1..3}; do +for X in {1000004..1000005}; do + download_rod_file "$X" +done \ No newline at end of file diff --git a/src/pynxtools_raman/config/config_file_rod.json b/src/pynxtools_raman/config/config_file_rod.json index 62d4982..315223f 100644 --- a/src/pynxtools_raman/config/config_file_rod.json +++ b/src/pynxtools_raman/config/config_file_rod.json @@ -18,7 +18,7 @@ "/ENTRY[entry]/INSTRUMENT[instrument]/LENS_OPT[objective_lens]/numerical_aperture": "@data:_raman_measurement_device.microscope_numerical_aperture", "/ENTRY[entry]/INSTRUMENT[instrument]/MONOCHROMATOR[monochromator]/grating/period": "@data:_raman_measurement_device.diffraction_grating", "/ENTRY[entry]/INSTRUMENT[instrument]/MONOCHROMATOR[monochromator]/grating/period/@units": "lines/mm", - "/ENTRY[entry]/SAMPLE[sample]/name":"@data:_chemical_name_systematic", + "/ENTRY[entry]/SAMPLE[sample]/name":"@data:_chemical_name_mineral", "/ENTRY[entry]/SAMPLE[sample]/physical_form":"@data:_[local]_chemical_compound_state", "/ENTRY[entry]/SAMPLE[sample]/chemical_formula":"@data:_chemical_formula_structural", "/ENTRY[entry]/SAMPLE[sample]/ENVIRONMENT[medium]/sample_medium":"@data:_raman_measurement.environment", @@ -31,7 +31,7 @@ "/ENTRY[entry]/definition/@url": "Remove_this_if_pynxtools_issue_#469_is_solved", "/ENTRY[entry]/experiment_type": "Raman spectroscopy", "/ENTRY[entry]/raman_experiment_type": "other", - "/ENTRY[entry]/title": "@data:_chemical_name_mineral", + "/ENTRY[entry]/title": "@data:_cod_original_formula_sum", "/ENTRY[entry]/start_time": "@data:_raman_measurement.datetime_initiated", "/ENTRY[entry]/@default": "data", "/ENTRY[entry]/DATA[data]/@signal": "y_values", diff --git a/src/pynxtools_raman/reader.py b/src/pynxtools_raman/reader.py index 5021697..d71d854 100644 --- a/src/pynxtools_raman/reader.py +++ b/src/pynxtools_raman/reader.py @@ -90,6 +90,12 @@ def handle_rod_file(self, filepath) -> Dict[str, Any]: # get the key and value pairs from the rod file self.raman_data = rod.extract_keys_and_values_from_cif() + # replace the [ and ] to avoid confliucts in processing with pynxtools NXclass assignments + self.raman_data = { + key.replace("_[local]_", "_local_"): value + for key, value in self.raman_data.items() + } + self.missing_meta_data = copy.deepcopy(self.raman_data) # This changes all uppercase string elements to lowercase string elements for the given key, within a given key value pair @@ -101,16 +107,17 @@ def handle_rod_file(self, filepath) -> Dict[str, Any]: # transform the string into a datetime object time_key = "_raman_measurement.datetime_initiated" date_time_str = self.raman_data.get(time_key) - date_time_obj = datetime.datetime.strptime(date_time_str, "%Y-%m-%d") - # assume UTC for .rod data, as this is not specified in detail - tzinfo = datetime.timezone.utc - if isinstance(date_time_obj, datetime.datetime): - if tzinfo is not None: - # Apply the specified timezone to the datetime object - date_time_obj = date_time_obj.replace(tzinfo=tzinfo) - - # assign the dictionary the corrrected date format - self.raman_data[time_key] = date_time_obj.isoformat() + if date_time_str is not None: + date_time_obj = datetime.datetime.strptime(date_time_str, "%Y-%m-%d") + # assume UTC for .rod data, as this is not specified in detail + tzinfo = datetime.timezone.utc + if isinstance(date_time_obj, datetime.datetime): + if tzinfo is not None: + # Apply the specified timezone to the datetime object + date_time_obj = date_time_obj.replace(tzinfo=tzinfo) + + # assign the dictionary the corrrected date format + self.raman_data[time_key] = date_time_obj.isoformat() # remove capitalization objective_type_key = "_raman_measurement_device.optics_type" @@ -199,7 +206,10 @@ def get_data(self, key: str, path: str) -> Any: # this filters out the meta data, which is up to now only created for .rod files if self.missing_meta_data: - del self.missing_meta_data[path] + # this if condition is required, to only delete keys which are abaialble by the data. + # e.g. is defined to extract it via config.json, but there is no value in meta data + if path in self.missing_meta_data.keys(): + del self.missing_meta_data[path] if value is not None: try: diff --git a/tests/data/rod/example.nxs b/tests/data/rod/example.nxs index 55e64f1593d60ae89b710c006ff3eb5883bf9f07..9569c15c980d44c360fab33efc7ff9a08b8d1f49 100644 GIT binary patch delta 3830 zcmZ`6Yfx0@^_~mJy_eUb#@z*F7kuo3ChP(VNUBR}492J{O(&>hWk=D)%0s0U7&Cx> zqCYyrdS+Tp+X>9HW-D6v_C~~1Ek276%xdd1CdD`=rV|=WI*OBV?Dx8W>11bi&-;Am zJFk0p&)(R`-q?0GL_*oipigglCdWi$K<^p^bfzg-A7_Cgqb=?bzz}-l-%~Bw68i*0 zx@))M$DrbdSPUr+h{K~Pg)k1UD6iS$xsntkpx0Z#Fa^D;)vo@4RKoy~CJ1SqfZKxA z4OIkB#D^(XNWvy{86@L*^*uItub9iJpP>}f&>3< z;;?&!m6?sD6cAXqBef{Ry#z19;#7yBSc(99 z6q`|zs)8lo>GjN=J$o+6b)F}GP9bg{@BBZe(K-H|xbjMIT&tHOJ%S#Z#Sy2-yE2c{ zy@GE4F{dkK4jbR#FdU=n`)Ko8v|iDE4_?JIPezjS1FqrgCJ=uWpRNua1!y*ny@vfw~ORsXu>;%t$IqiSMVI>FB(D(GpzT<-6(~!Q;7)Wmg>^QAp zWQh~p7?|RceG;}^n4y{Wz1boCgwc`V2HAH2SEkKSdko>>H=8w>Fq_`XLOG(2u>{Q= zL(`UF11IFP6=Sf}ncG4&O)HI{tht019X!^wf=(o~k9X`Brg0f-= zdm7=3W8|rll=qy%cp_wdEDR3th4TIJa;HI5AM@3ib9FL$jJ? zyx=(jjd*K{3)*p)*QJK(apdF2yIzN84q|04=|=3xSZQSCe;#9078l8KSuws_@=LYs zZEg_6lX(GSe~|@nV{Q*#$g41#U%m`vHS8*MX%%8#`iet(2G-A0;3lRnacJfwKPce` ztTOJ+AB<5S2=!s?oafMNtfNRw^NF$Q6I3O_0*6*9sQOYmFP2wrfS!eOt--3A>a~GQ zYia{ERcp33ZEdKk2{cr%#kPg@(>K*o$y!roZCy=$-7jj_tyx>QscwrTtzmNFWnyeY zprOk6&%#qO^cwo2BVcqao&*Vs)G7{b#pNV~u%K=(AmM3IK`DhH-Jyu>W5ea-9&SMe z7t?!G2&xwYBoXa`iY}#2!h#xDMkto|@}=j<#>iNSr~cTDmcC5oO`dj}u0IAWD&Hsv ze~N!7|3J3p$Y^%bYi znLe>MokAzVb@+ZDyzrm6ps$O=MxodE5lJNYu(QRgR(vRA*m-<`(cfA!O&bjXo3EEc zrg8lJ34s1Psj!b$kgaB7v?r|V$%J3D3e&^a)SiRfk%#50OCh7Fc5To`W5DLFa`HOR z-?bVBaOQ!ta`=F#CrDU&(4l!mc$P>=|D1G$8~uOIBW`9MI86J46o8x~_jK#~C&_Z9 zfjj=PUK>pX&W&G)MMfgbpyiV-N4Z)z?l@eV<`b_uX4OU?mO9A<4xCv@VkkMg1fxe9 zWvNHk=Kw~-PC0mjdD(LeKm6-@Z6sI6Fy~k~d6D^j#%fu~!aK*#$Q8$#QN&?Wou~Ol zNOPTV$DiQ@9TV1(ttA$%Ox)CV${2sr3hGD?u}d`OBtkZRa%MHyaMj;e;LE3L;Iv4U zJaCp`L#I_ZM-a_?f=hjL%g}o`jgqq|0B&RPIRc7^hwDl|KaWgQyo|mJDqO_ZFKkdp zT9TD!COguHKfibmF405$msGgSYD}l|>g8@WrSE>+O;e+mUFS~A3}}v*llH1`~{RU?RLv1d(=Qd08&^0{u&vglLlGzD$FkALW$#(i9d5&}()fvyc7yikZOpq>n>H-2X~yHc$#s3Vb=G*q=O~MHm4|i} z1o5{YpOu4mXdURr>;04!f)==uH0la`2r#n^Zsht)Xw~6+a9@@qfchdjl1fg8c&|1i z8hxXoTUp4sL$vGYx{og?GI^*e-_EsmP05kb0-gks^)IBoQ;{>bYIuoV6W DAdP2V delta 3731 zcmZ`+e^3ay;(L&Q2Q3mLVp6OPtTI4LECtpTGRMpLmm&IW0? zJI)Ey@KD^3AswHPeg<1NZLTigT>VBh$4Tgkmq5m~^mJfDyc06;FJ<$cHWFo#YW`+ZpbcuJx6BC;l6Ws4-%ItgnX zG8t=~SAZpQZlj(8wo98UZKr7^me}G+IF9&zlW;uon#|NsAXd*ZvzJIXXVSVB!Q}NM zj?3fp6M}6>CIxBIhAG7BOKe>m)o64$q(;5yOll*kek3q#C#;u)VQoD15n(u$us+rd zrx9L4f97jUBUXDg6Q>ia`7p7A;}SS6H>MyH?>Myi9Cz5jiO*+BkcEYr()`(^GC8np za|?6q)@H>~BYUL)p z;ywpWsCb-~HbcyXvI9!Sf%Zrc$mp;~f*?N0*81|PnwkG~1ctH&=toD66XhHMy*V_H z*g(in$i7H66c<=Q-8An@5Zl=>k*Q=GR?ROL{d<{zb$DSeFM9P7a-0`om4sD`1m2oU z0f=2pF5{u4lEs{I_e%lgyy{(C8fg*q7UQ^<#hSW{p*9>L^-_j9N@&eu9tk~5XfqDY zuEhLh=c2aO|Ezi|K3MjWt$y>y`t7wf+Nrs5Oa1n3Z*HyLxZ}-jShC!&jx7I(hf!5o zaT)@_BJ{f}@#4y}aWPyQb75Vkx$k4B3QHDAmJmb!1)6)A9Y5_WHO+E{Mse*bN%S)$ z;34hp(_Mj}OAW4)dF@hZ4LE*uOUgKGx5_>-xdsyT!_YNf+(nZUc<|0xN!5 z1`?(Z&Jd0mGOMD*@Ho^c&@K2)V?FkDTayOQGyR}JeOS_LtFZPN$bH$Mto^wbefHU~ z($XO7fYDzueS>g=`k64HSK4JP_)g-zri|474g-cd4Z4eM+x3~2jjnVdwUkX7YaOoX zjG@wgHlQq34>vEIVVUra_YRlQSInI?Q|OjhZ{>Z5H8V{|1wMFGQ4#>#Gb-@cN4hcO zm4%_-8JxLXC>gN7r-BbT@FzXJyz8{0%%!W@EAy@AbPK_=xc|c~mWgEHf|%vl z(Gy3XAHT`iz>m^9WYarzTj->yZvH_a_)yPD+eI@ctHL?4l(M4OKUR}3*gy<7%7IP+ z%D{!=r#FCrm3^<`i@n?6f|-+{UX2U&ia5|KS$Ranb%; z!l#BSnhxsE;wvIt##gS?h~XyG%P>oK=*FLadI_%R&aR4Z)vPaHl&MaeMYX>0nO0{9 zf0M(PdMlOSZ?%E3##gboDIFn3KWD3O__`DNP`n|F*NoMUfpEfd^vbl}Ryu7we2({S zZjOuQ%;VqnZ}Q)04n;8=0<%|Jt}|<&8`N$PuH#FCG4=Wx4hnFT^tsA{Zli<&KIp`2;yQ9@ZE3*&N{(4pP4Nv$L zTkje`P#eY)dEPpVd0%a@Ok_bHV~>8b4gPInAMgD;i}$V(FS8krmtp(eaJ*qjINm(m zko!~mIDhxZAK~k8wEr-pZNjvB6z%=6R~I}`IurE6lDFQ+yayC$xD0xr1^Ps_WSSGj z_utpYQ9EnO{?)B|Lsq(H1l;wX(kXu5?j5ysN9lJG`Jm9LIajGwBXs}E4_)9^S3HcN zf5FtX-%`7kW9_3Fn$UqqC&cjAsk-m+PM~&$Cr(~|YPLP@e|igw)!H%I=Evso8ZrEt zs*a5B1m68$_O{TX2EJPp32g_4Bfyz8%5HNri-X~N!^lC4LAZ_so=9-Ho^A2)z!+-n hOq)B<90^X|8jZ?|jMIIQ&>C%-_!vLEDHc}4{{Vs2Y2^R_ diff --git a/tests/data/witec/example.nxs b/tests/data/witec/example.nxs index c90341383c1318993dca133e19fb870c1d23fe17..7a352dcb7b15bf788a9d6c6988046bd869ec8d5f 100644 GIT binary patch delta 65 zcmX@`oaMlCmJOMF62=OK23DpfRwgET=H`}W7N*(;hE@g!#*+j2Bm^-en^*B|U&Y5L GrV0RCz7Wy? delta 65 zcmX@`oaMlCmJOMF5(Wx}MpniqRtBbeCKg6UW+vJOhE@g!#*+j2Bm^-en^*B|U&Y5L GrV0R7st~3C