@@ -53,6 +53,7 @@ class SST1RSoXSDB:
53
53
}
54
54
md_secondary_lookup = {
55
55
"energy" : "en_monoen_setpoint" ,
56
+ "exposure" : "Small Angle CCD Detector_cam_acquire_time" ,
56
57
}
57
58
58
59
def __init__ (
@@ -258,6 +259,7 @@ def searchCatalog(
258
259
scan_id : int = None ,
259
260
userOutputs : list = [],
260
261
debugWarnings : bool = False ,
262
+ existingCatalog : pd .DataFrame = None ,
261
263
** kwargs ,
262
264
) -> pd .DataFrame :
263
265
"""Search the Bluesky catalog for scans matching all provided keywords and return metadata as a dataframe.
@@ -300,6 +302,7 @@ def searchCatalog(
300
302
plan (str, optional): Measurement Plan, case-insensitive, regex search,
301
303
e.g., "Full" matches "full_carbon_scan_nd", "full_fluorine_scan_nd"
302
304
e.g., "carbon|oxygen|fluorine" matches carbon OR oxygen OR fluorine scans
305
+ scan_id (int, optional): Scan ID, exact numeric match, e.g., 12345
303
306
**kwargs: Additional search terms can be provided as keyword args and will further filter
304
307
the catalog Valid input follows metadataLabel='searchTerm' or metadataLavel = ['searchTerm','matchType'].
305
308
Metadata labels must match an entry in the 'start' dictionary of the catalog. Supported match types are
@@ -319,6 +322,7 @@ def searchCatalog(
319
322
r'catalog.stop["num_events"]']
320
323
e.g., userOutputs = [["Exposure Multiplier","exptime", r'catalog.start'], ["Stop Time","time",r'catalog.stop']]
321
324
debugWarnings (bool, optional): if True, raises a warning with debugging information whenever a key can't be found.
325
+ existingCatalog (pd.Dataframe, optional): if provided, results with scan_id that appear in this dataframe and equal number of points will not be re-downloaded.
322
326
Returns:
323
327
Pandas dataframe containing the results of the search, or an empty dataframe if the search fails
324
328
"""
@@ -418,7 +422,6 @@ def searchCatalog(
418
422
# List elements are [Output Column Title, Bluesky Metadata Code, Metadata Source location, Applicable Output flag]
419
423
outputValueLibrary = [
420
424
["scan_id" , "scan_id" , r"catalog.start" , "default" ],
421
- ["uid" , "uid" , r"catalog.start" , "ext_bio" ],
422
425
["start_time" , "time" , r"catalog.start" , "default" ],
423
426
["cycle" , "cycle" , r"catalog.start" , "default" ],
424
427
["saf" , "SAF" , r"catalog.start" , "ext_bio" ],
@@ -434,6 +437,7 @@ def searchCatalog(
434
437
["sample_rotation" , "angle" , r"catalog.start" , "ext_msmt" ],
435
438
["exit_status" , "exit_status" , r"catalog.stop" , "default" ],
436
439
["num_Images" , "primary" , r'catalog.stop["num_events"]' , "default" ],
440
+ ["uid" , "uid" , r"catalog.start" , "default" ],
437
441
]
438
442
439
443
# Subset the library based on the output flag selected
@@ -478,11 +482,18 @@ def searchCatalog(
478
482
479
483
# Build output dataframe as a list of lists
480
484
outputList = []
481
-
485
+
482
486
# Outer loop: Catalog entries
483
- for scanEntry in tqdm (reducedCatalog .values (), desc = "Retrieving results... " ):
487
+ for scanEntry in tqdm (reducedCatalog .items (), desc = "Retrieving results" ):
484
488
singleScanOutput = []
485
489
490
+ if existingCatalog is not None :
491
+ if scanEntry [0 ] in existingCatalog .uid .values :
492
+ # if the scan is already in the catalog, skip it
493
+ continue
494
+
495
+ scanEntry = scanEntry [1 ]
496
+
486
497
# Pull the start and stop docs once
487
498
488
499
currentCatalogStart = scanEntry .start
@@ -1032,23 +1043,28 @@ def loadMonitors(
1032
1043
1033
1044
monitors = None
1034
1045
1046
+ monitor_accumulator = []
1047
+
1035
1048
# Iterate through the list of streams held by the Bluesky document 'entry'
1036
1049
for stream_name in list (entry .keys ()):
1037
1050
# Add monitor streams to the output xr.Dataset
1038
1051
if "monitor" in stream_name :
1039
- if monitors is None : # First one
1040
- # incantation to extract the dataset from the bluesky stream
1041
- monitors = entry [stream_name ].data .read ()
1042
- else : # merge into the to existing output xarray
1043
- monitors = xr .merge ((monitors , entry [stream_name ].data .read ()))
1052
+ monitor_accumulator .append (entry [stream_name ].data .read ())
1053
+ #if monitors is None: # First one
1054
+ # # incantation to extract the dataset from the bluesky stream
1055
+ # monitors = entry[stream_name].data.read()
1056
+ #else: # merge into the to existing output xarray
1057
+ # monitors = xr.merge((monitors, entry[stream_name].data.read()))
1058
+ # if there are no monitors, return an empty xarray Dataset
1059
+ if len (monitor_accumulator ) == 0 :
1060
+ return xr .Dataset ()
1044
1061
1062
+ monitors = xr .merge (monitor_accumulator )
1063
+
1045
1064
# At this stage monitors has dimension time and all streams as data variables
1046
1065
# the time dimension inherited all time values from all streams
1047
1066
# the data variables (Mesh current, sample current etc.) are all sparse, with lots of nans
1048
1067
1049
- # if there are no monitors, return an empty xarray Dataset
1050
- if monitors is None :
1051
- return xr .Dataset ()
1052
1068
1053
1069
# For each nan value, replace with the closest value ahead of it in time
1054
1070
# For remaining nans, replace with closest value behind it in time
@@ -1185,6 +1201,7 @@ def loadMd(self, run):
1185
1201
stacklevel = 2 ,
1186
1202
)
1187
1203
1204
+
1188
1205
if md ["rsoxs_config" ] == "saxs" :
1189
1206
md ["detector" ] = "Small Angle CCD Detector"
1190
1207
elif md ["rsoxs_config" ] == "waxs" :
0 commit comments