np_array mandatory, downloading images is 50% of work, dumber operati…

…on progress but boost to performance with not checking the cache so often
LCOGT · Oct 4, 2024 · 69ae917 · 69ae917
1 parent 6130cb0
commit 69ae917
Show file tree

Hide file tree

Showing 7 changed files with 11 additions and 21 deletions.
diff --git a/datalab/datalab_session/data_operations/data_operation.py b/datalab/datalab_session/data_operations/data_operation.py
@@ -110,4 +110,6 @@ def get_fits_npdata(self, input_files: list[dict]) -> list[np.memmap]:
             sci_hdu = get_hdu(fits_path, 'SCI')
             image_data_list.append(sci_hdu.data)
 
+            self.set_operation_progress(index / len(input_files) * 0.5)
+
         return image_data_list
diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py
@@ -49,7 +49,6 @@ def operate(self):
         log.info(f'Executing median operation on {len(input)} files')
 
         image_data_list = self.get_fits_npdata(input)
-        self.set_operation_progress(0.40)
 
         cropped_data_list = crop_arrays(image_data_list)
         stacked_data = np.stack(cropped_data_list, axis=2)

diff --git a/datalab/datalab_session/data_operations/normalization.py b/datalab/datalab_session/data_operations/normalization.py
@@ -45,7 +45,6 @@ def operate(self):
         log.info(f'Executing normalization operation on {len(input)} file(s)')
 
         image_data_list = self.get_fits_npdata(input)
-        self.set_operation_progress(0.40)
 
         output_files = []
         for index, image in enumerate(image_data_list):
@@ -54,8 +53,8 @@ def operate(self):
 
             output = create_output(self.cache_key, normalized_image, index=index, comment=f'Product of Datalab Normalization on file {input[index]["basename"]}')
             output_files.append(output)
-
-            self.set_operation_progress(self.get_operation_progress() + .40 * (index + 1) / len(input))
+        
+        self.set_operation_progress(0.80)
 
         self.set_output(output_files)
         log.info(f'Normalization output: {self.get_output()}')
diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py
@@ -65,9 +65,9 @@ def operate(self):
         log.info(f'Executing RGB Stack operation on files: {rgb_input_list}')
 
         fits_paths = []
-        for file in rgb_input_list:
+        for index, file in enumerate(rgb_input_list, start=1):
             fits_paths.append(get_fits(file.get('basename')))
-            self.set_operation_progress(self.get_operation_progress() + 0.2)
+            self.set_operation_progress(index * 0.2)
 
         large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_paths, color=True)
 
@@ -81,6 +81,5 @@ def operate(self):
         rgb_comment = f'Product of Datalab RGB Stack on files {", ".join([image["basename"] for image in rgb_input_list])}'
         output =  create_output(self.cache_key, stacked_data, large_jpg=large_jpg_path, small_jpg=small_jpg_path, comment=rgb_comment)
 
-        self.set_operation_progress(1.0)
         self.set_output(output)
         log.info(f'RGB Stack output: {self.get_output()}')
diff --git a/datalab/datalab_session/data_operations/stacking.py b/datalab/datalab_session/data_operations/stacking.py
@@ -51,16 +51,12 @@ def operate(self):
 
         image_data_list = self.get_fits_npdata(input_files)
 
-        self.set_operation_progress(0.4)
-
         cropped_data = crop_arrays(image_data_list)
         stacked_data = np.stack(cropped_data, axis=2)
-
         self.set_operation_progress(0.6)
 
         # using the numpy library's sum method
         stacked_sum = np.sum(stacked_data, axis=2)
-
         self.set_operation_progress(0.8)
 
         stacking_comment = f'Product of Datalab Stacking. Stack of {", ".join([image["basename"] for image in input_files])}'

diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py
@@ -50,9 +50,7 @@ def wizard_description():
     def operate(self):
 
         input_files = self.input_data.get('input_files', [])
-        print(f'Input files: {input_files}')
         subtraction_file_input = self.input_data.get('subtraction_file', [])
-        print(f'Subtraction file: {subtraction_file_input}')
 
         if not subtraction_file_input:
             raise ClientAlertException('Missing a subtraction file')
@@ -63,10 +61,9 @@ def operate(self):
         log.info(f'Executing subtraction operation on {len(input_files)} files')
 
         input_image_data_list = self.get_fits_npdata(input_files)
-        self.set_operation_progress(.30)
 
         subtraction_image = self.get_fits_npdata(subtraction_file_input)[0]
-        self.set_operation_progress(.40)
+        self.set_operation_progress(0.70)
 
         outputs = []
         for index, input_image in enumerate(input_image_data_list):
@@ -77,8 +74,8 @@ def operate(self):
 
             subtraction_comment = f'Product of Datalab Subtraction of {subtraction_file_input[0]["basename"]} subtracted from {input_files[index]["basename"]}'
             outputs.append(create_output(self.cache_key, difference_array, index=index, comment=subtraction_comment))
-
-            self.set_operation_progress(self.get_operation_progress() + .50 * (index + 1) / len(input_files))
+        
+        self.set_operation_progress(0.90)
 
         self.set_output(outputs)
         log.info(f'Subtraction output: {self.get_output()}')
diff --git a/datalab/datalab_session/file_utils.py b/datalab/datalab_session/file_utils.py
@@ -110,15 +110,13 @@ def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_poi
 
   return x_points, y_points
 
-def create_output(cache_key, np_array=None, fits_file=None, large_jpg=None, small_jpg=None, index=None, comment=None):
+def create_output(cache_key, np_array, large_jpg=None, small_jpg=None, index=None, comment=None):
   """
   A more automated way of creating output for a dev
   Dev can specify just a cache_key and np array and the function will create the fits and jpgs
   or the dev can pass the fits_file or jpgs and the function will save them
   """
-
-  if np_array is not None and fits_file is None:
-    fits_file = create_fits(cache_key, np_array, comment)
+  fits_file = create_fits(cache_key, np_array, comment)
 
   if not large_jpg or not small_jpg:
     large_jpg, small_jpg = create_jpgs(cache_key, fits_file)