Skip to content

Commit 3f57ce5

Browse files
committed
Add diode finding function to loader
1 parent e41778e commit 3f57ce5

File tree

1 file changed

+28
-11
lines changed

1 file changed

+28
-11
lines changed

src/PyHyperScattering/SST1RSoXSDB.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class SST1RSoXSDB:
5353
}
5454
md_secondary_lookup = {
5555
"energy": "en_monoen_setpoint",
56+
"exposure": "Small Angle CCD Detector_cam_acquire_time",
5657
}
5758

5859
def __init__(
@@ -258,6 +259,7 @@ def searchCatalog(
258259
scan_id: int = None,
259260
userOutputs: list = [],
260261
debugWarnings: bool = False,
262+
existingCatalog: pd.DataFrame = None,
261263
**kwargs,
262264
) -> pd.DataFrame:
263265
"""Search the Bluesky catalog for scans matching all provided keywords and return metadata as a dataframe.
@@ -300,6 +302,7 @@ def searchCatalog(
300302
plan (str, optional): Measurement Plan, case-insensitive, regex search,
301303
e.g., "Full" matches "full_carbon_scan_nd", "full_fluorine_scan_nd"
302304
e.g., "carbon|oxygen|fluorine" matches carbon OR oxygen OR fluorine scans
305+
scan_id (int, optional): Scan ID, exact numeric match, e.g., 12345
303306
**kwargs: Additional search terms can be provided as keyword args and will further filter
304307
the catalog Valid input follows metadataLabel='searchTerm' or metadataLavel = ['searchTerm','matchType'].
305308
Metadata labels must match an entry in the 'start' dictionary of the catalog. Supported match types are
@@ -319,6 +322,7 @@ def searchCatalog(
319322
r'catalog.stop["num_events"]']
320323
e.g., userOutputs = [["Exposure Multiplier","exptime", r'catalog.start'], ["Stop Time","time",r'catalog.stop']]
321324
debugWarnings (bool, optional): if True, raises a warning with debugging information whenever a key can't be found.
325+
existingCatalog (pd.Dataframe, optional): if provided, results with scan_id that appear in this dataframe and equal number of points will not be re-downloaded.
322326
Returns:
323327
Pandas dataframe containing the results of the search, or an empty dataframe if the search fails
324328
"""
@@ -418,7 +422,6 @@ def searchCatalog(
418422
# List elements are [Output Column Title, Bluesky Metadata Code, Metadata Source location, Applicable Output flag]
419423
outputValueLibrary = [
420424
["scan_id", "scan_id", r"catalog.start", "default"],
421-
["uid", "uid", r"catalog.start", "ext_bio"],
422425
["start_time", "time", r"catalog.start", "default"],
423426
["cycle", "cycle", r"catalog.start", "default"],
424427
["saf", "SAF", r"catalog.start", "ext_bio"],
@@ -434,6 +437,7 @@ def searchCatalog(
434437
["sample_rotation", "angle", r"catalog.start", "ext_msmt"],
435438
["exit_status", "exit_status", r"catalog.stop", "default"],
436439
["num_Images", "primary", r'catalog.stop["num_events"]', "default"],
440+
["uid", "uid", r"catalog.start", "default"],
437441
]
438442

439443
# Subset the library based on the output flag selected
@@ -478,11 +482,18 @@ def searchCatalog(
478482

479483
# Build output dataframe as a list of lists
480484
outputList = []
481-
485+
482486
# Outer loop: Catalog entries
483-
for scanEntry in tqdm(reducedCatalog.values(), desc="Retrieving results..."):
487+
for scanEntry in tqdm(reducedCatalog.items(), desc="Retrieving results"):
484488
singleScanOutput = []
485489

490+
if existingCatalog is not None:
491+
if scanEntry[0] in existingCatalog.uid.values:
492+
# if the scan is already in the catalog, skip it
493+
continue
494+
495+
scanEntry = scanEntry[1]
496+
486497
# Pull the start and stop docs once
487498

488499
currentCatalogStart = scanEntry.start
@@ -1032,23 +1043,28 @@ def loadMonitors(
10321043

10331044
monitors = None
10341045

1046+
monitor_accumulator = []
1047+
10351048
# Iterate through the list of streams held by the Bluesky document 'entry'
10361049
for stream_name in list(entry.keys()):
10371050
# Add monitor streams to the output xr.Dataset
10381051
if "monitor" in stream_name:
1039-
if monitors is None: # First one
1040-
# incantation to extract the dataset from the bluesky stream
1041-
monitors = entry[stream_name].data.read()
1042-
else: # merge into the to existing output xarray
1043-
monitors = xr.merge((monitors, entry[stream_name].data.read()))
1052+
monitor_accumulator.append(entry[stream_name].data.read())
1053+
#if monitors is None: # First one
1054+
# # incantation to extract the dataset from the bluesky stream
1055+
# monitors = entry[stream_name].data.read()
1056+
#else: # merge into the to existing output xarray
1057+
# monitors = xr.merge((monitors, entry[stream_name].data.read()))
1058+
# if there are no monitors, return an empty xarray Dataset
1059+
if len(monitor_accumulator) == 0:
1060+
return xr.Dataset()
10441061

1062+
monitors = xr.merge(monitor_accumulator)
1063+
10451064
# At this stage monitors has dimension time and all streams as data variables
10461065
# the time dimension inherited all time values from all streams
10471066
# the data variables (Mesh current, sample current etc.) are all sparse, with lots of nans
10481067

1049-
# if there are no monitors, return an empty xarray Dataset
1050-
if monitors is None:
1051-
return xr.Dataset()
10521068

10531069
# For each nan value, replace with the closest value ahead of it in time
10541070
# For remaining nans, replace with closest value behind it in time
@@ -1185,6 +1201,7 @@ def loadMd(self, run):
11851201
stacklevel=2,
11861202
)
11871203

1204+
11881205
if md["rsoxs_config"] == "saxs":
11891206
md["detector"] = "Small Angle CCD Detector"
11901207
elif md["rsoxs_config"] == "waxs":

0 commit comments

Comments
 (0)