From 566fa61472718c9ad687da3790e8db9742283cb6 Mon Sep 17 00:00:00 2001
From: Robert Morgan <robert.morgan@wisc.edu>
Date: Mon, 15 Nov 2021 11:58:29 -0600
Subject: [PATCH] This commit contains the updates for version 0.0.2.1.

The main changes are as follows:

1. If sampling redshfits from USERDISTs or DISTRIBUTIONs caused unphysical geometries (e.g. PLANE_2 having a REDSHIFT less than PLANE_1), the simulated system is deleted from the images and metadata.
2. The BACKGROUNDS section of the configuration file now accepts an ITERATE key for which the value is unused (set it to None). If ITERATE mode is used, the images are used in the simulations iteratively which ensures that all images get used.
---
 HISTORY.rst                        |  5 ++++
 deeplenstronomy/deeplenstronomy.py | 32 +++++++++++++++-----
 deeplenstronomy/input_reader.py    | 48 +++++++++++++++++++++++++++++-
 deeplenstronomy/utils.py           | 37 +++++++++++++++++++----
 setup.py                           |  2 +-
 5 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 1a9f2f9..d9370fb 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -3,6 +3,11 @@
 History
 -------
 
+0.0.2.1 (2021-11-15)
++++++++++++++++++++++
+* ITERATE option for image BACKGROUNDS
+* Drop any systems with unphysical redshifts from simulations
+
 0.0.2.0 (2021-05-07)
 +++++++++++++++++++++
 * Verified stability of all new time series features
diff --git a/deeplenstronomy/deeplenstronomy.py b/deeplenstronomy/deeplenstronomy.py
index 75450b1..bb8f1a6 100644
--- a/deeplenstronomy/deeplenstronomy.py
+++ b/deeplenstronomy/deeplenstronomy.py
@@ -11,7 +11,7 @@
 
 from deeplenstronomy.input_reader import Organizer, Parser
 from deeplenstronomy.image_generator import ImageGenerator
-from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices
+from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices, treat_map_like_user_dist
 from deeplenstronomy import surveys
 
 class Dataset():
@@ -338,6 +338,13 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
     # Store configurations
     dataset.configurations = list(dataset.config_dict['GEOMETRY'].keys())
 
+    # Handle image backgrounds if they exist
+    if len(parser.image_paths) > 0:
+        im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
+        image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands)
+    else:
+        image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:]
+    
     # If user-specified distributions exist, draw from them
     forced_inputs = {}
     max_size = dataset.size * 100 # maximum 100 epochs if timeseries
@@ -352,6 +359,11 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
         draw_param_names, draw_param_values = draw_from_user_dist(filename, max_size, mode, step)
         forced_inputs[filename] = {'names': draw_param_names, 'values': draw_param_values}
 
+    # If we want to iterate through map.txt, add the parameters to the forced inputs
+    if "ITERATE" in parser.config_dict['BACKGROUNDS']:
+        im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
+        draw_param_names, draw_param_values = treat_map_like_user_dist(im_dir, max_size)
+        forced_inputs[im_dir + '/map.txt'] = {'names': draw_param_names, 'values': draw_param_values}
         
     # Overwrite the configuration dict with any forced values from user distribtuions
     force_param_inputs = _get_forced_sim_inputs(forced_inputs, dataset.configurations, dataset.bands)
@@ -379,13 +391,6 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
     # Initialize the ImageGenerator
     ImGen = ImageGenerator(return_planes, solve_lens_equation)
 
-    # Handle image backgrounds if they exist
-    if len(parser.image_paths) > 0:
-        im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
-        image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands)
-    else:
-        image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:]
-
     # Clear the sim_dicts out of memory
     if not os.path.exists(dataset.outdir):
         os.system('mkdir ' + dataset.outdir)
@@ -506,6 +511,17 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
         metadata_df = pd.DataFrame(metadata)
         del metadata
 
+        # Delete images with sentinel redshifts
+        redshift_cols = [x for x in metadata_df.columns if x.find('REDSHIFT') != -1]
+        plane_cols = {int(x[x.index('PLANE_') + 6: x.index('-', x.index('PLANE_') + 1)]) : x for x in redshift_cols}
+        greatest_plane_col = plane_cols[max(plane_cols.keys())]
+        mask = metadata_df[greatest_plane_col].values < 10
+        configuration_images = configuration_images[mask]
+        metadata_df = metadata_df[mask].copy().reset_index(drop=True)
+        if return_planes:
+            configuration_planes = configuration_planes[mask]
+                      
+        
         # Save the images and metadata to the outdir if desired (ideal for large simulation production)
         if save_to_disk:
             #Images
diff --git a/deeplenstronomy/input_reader.py b/deeplenstronomy/input_reader.py
index 1fab03c..273c21e 100644
--- a/deeplenstronomy/input_reader.py
+++ b/deeplenstronomy/input_reader.py
@@ -364,6 +364,23 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):
 
                     # Set the PLANE's redshift in the config_dict
                     if k_param == 'REDSHIFT':
+                        # Check if drawn redshift is less than a closer plane, and redraw if necessary
+                        if plane_num >= 2:
+                            prev_plane_num = plane_num - 1
+                            tries = 0
+                            while tries < 10:
+                                if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= draws[0]:
+                                    draws = self._draw(v_param['DISTRIBUTION'], bands)
+                                    tries += 1
+                                else:
+                                    break
+                            else:
+                                # Set a sentinal value to drop this system
+                                if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10:
+                                    draws = [config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1] * len(bands)
+                                else:
+                                    draws = [10] * len(bands)
+                                    
                         config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = draws[0]
                     
                     for band, draw in zip(bands, draws):
@@ -372,7 +389,20 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):
                 else:
                     # Set the PLANE's redshift in the config_dict
                     if k_param == 'REDSHIFT':
-                        config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)]	= v_param
+                        # set the redshift to a sentinal value if it's less than a previous plane
+                        if plane_num >= 2:
+                            prev_plane_num = plane_num - 1
+                            if v_param < config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)]:
+                                if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10:
+                                    config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1
+                                    redshift = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1
+                                else:
+                                    config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = 10
+                                    redshift = 10
+
+                            else:
+                                config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)]	= v_param
+                        
                     
                     for band in bands:
                         for obj_num in range(1, config_dict['SIM_DICT']['PLANE_{0}-NUMBER_OF_OBJECTS'.format(plane_num)] + 1):
@@ -521,6 +551,22 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):
             
                 if param_name in output_dict[band]:
                     output_dict[band][param_name] = inputs[(param_name, band)]
+
+                    # Protect against non-physical geometries by setting a sentinal value
+                    if param_name.find('REDSHIFT') != -1:
+                        plane_num = int(param_name.split('PLANE_')[1].split('-')[0])
+                        if plane_num >= 2:
+                            for prev_plane_num in range(1, plane_num):
+                                prev_plane_param_name = param_name.replace('PLANE_{0}'.format(plane_num), 'PLANE_{0}'.format(prev_plane_num))
+                                if output_dict[band][param_name] <= output_dict[band][prev_plane_param_name]:
+                                    if output_dict[band][prev_plane_param_name] >= 10:
+                                        new_redshift = output_dict[band][prev_plane_param_name] + 1
+                                    else:
+                                        new_redshift = 10
+
+                                    for b in output_dict.keys():
+                                        output_dict[b][param_name] = new_redshift
+                    
                 else:
                     print("WARNING: " + param_name + " is not present in the simulated dataset and may produce unexpected behavior. Use dataset.search(<param name>) to find all expected names")
 
diff --git a/deeplenstronomy/utils.py b/deeplenstronomy/utils.py
index 0814876..eea184f 100644
--- a/deeplenstronomy/utils.py
+++ b/deeplenstronomy/utils.py
@@ -131,9 +131,21 @@ def read_distribution_file(filename):
     assert 'WEIGHT' in df.columns, "'WEIGHT' must be a column in {}".format(filename)
 
     return df
-        
 
-def draw_from_user_dist(filename, size, mode, step=10):
+
+def treat_map_like_user_dist(im_dir, size):
+    """Use the iterate mode of draw_from_user_dist for map.txt.
+
+    Args:
+        im_dir (str): name of directory containing map.txt
+        size (int): size of simulations
+    """
+    df = pd.read_csv(im_dir + '/' + 'map.txt', delim_whitespace=True)
+    df['WEIGHT'] = 1.0
+    return draw_from_user_dist("unused filename", size, 'iterate', df=df)
+    
+
+def draw_from_user_dist(filename, size, mode, step=10, df=None):
     """
     Interpolate a user-specified N-dimensional probability distribution and
     sample from it.
@@ -143,16 +155,18 @@ def draw_from_user_dist(filename, size, mode, step=10):
         size (int):  the number of times to sample the probability distribution 
         mode (str): choose from ['interpolate', 'sample'] 
         step (int): the number of steps on the interpolation grid  
+        df (pd.DataFrame): optional already read dataframe.
         
     Returns:
         parameters: list, the names of the paramters
         choices: array with entries as arrays of drawn parameters 
 
     Raises:
-        NotImplementedError: if a mode other than "sample" or "interpolate" is passed
+        NotImplementedError: if a mode other than "sample" or "interpolate" or "iterate" is passed
     """
 
-    df = read_distribution_file(filename)
+    if df is None:
+        df = read_distribution_file(filename)
 
     parameters = [x for x in df.columns if x != 'WEIGHT']
     points = df[parameters].values
@@ -184,11 +198,17 @@ def draw_from_user_dist(filename, size, mode, step=10):
         index_arr = np.random.choice(np.arange(len(points), dtype=int), size=size, p=weights / weights.sum())
         choices = points[index_arr]
 
+    elif mode == 'iterate':
+        num_repeats = size // len(df) + 1
+        index_arr = np.tile(np.arange(len(df)), num_repeats)[:size]
+        choices = points[index_arr]
+        
     else:
         raise NotImplementedError("unexpected mode passed, must be 'sample' or 'interpolate'")
             
     return parameters, choices
 
+
 def read_images(im_dir, im_size, bands):
     """
     Read images into memory and resize to match simulations.
@@ -237,7 +257,7 @@ def read_images(im_dir, im_size, bands):
 
     return im_array
 
-def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration):
+def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration, overwrite=False):
     """
     Sort image files based on map. If no map exists, sort randomly.
 
@@ -246,6 +266,7 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat
         image_bank_size (int): number of images in user-specified bank
         config_dicts (List[dict]): list of config_dicts    
         configuration (str): the configuration currently running
+        overwrite (bool): optionally overwrite sim_inputs instead of solving
     
     Returns:
         the indices of the images utilized for each config_dict 
@@ -277,13 +298,17 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat
 
 
         if len(bad_columns) != 0:
-            print(config_dicts[0].keys())
             print("WARNING {0} are not found in the simulated dataset for {1}".format(', '.join(bad_columns), configuration) +
                   ". You may see unexpected results. Use the dataset.search(<param_name>) function to find the correct column names.")
         
     if len(map_columns) == 0:
         # Sort randomly
         image_indices = np.random.choice(np.arange(image_bank_size), replace=True, size=len(config_dicts))
+
+    elif overwrite:
+        # Maintain order for iterative insertion.
+        num_repeats = len(config_dicts) // image_bank_size + 1
+        image_indices = np.tile(np.arange(image_bank_size), num_repeats)[:len(config_dicts)]
     
     else:
         # Trim df to just the columns needed
diff --git a/setup.py b/setup.py
index ce2d513..39160ba 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 
 setup(
     name='deeplenstronomy',
-    version='0.0.2.0',
+    version='0.0.2.1',
     description='wrap lenstronomy for efficient simulation generation',
     long_description=long_description,
     long_description_content_type='text/markdown',