diff --git a/HISTORY.rst b/HISTORY.rst index 1a9f2f9..d9370fb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,11 @@ History ------- +0.0.2.1 (2021-11-15) ++++++++++++++++++++++ +* ITERATE option for image BACKGROUNDS +* Drop any systems with unphysical redshifts from simulations + 0.0.2.0 (2021-05-07) +++++++++++++++++++++ * Verified stability of all new time series features diff --git a/deeplenstronomy/deeplenstronomy.py b/deeplenstronomy/deeplenstronomy.py index 75450b1..bb8f1a6 100644 --- a/deeplenstronomy/deeplenstronomy.py +++ b/deeplenstronomy/deeplenstronomy.py @@ -11,7 +11,7 @@ from deeplenstronomy.input_reader import Organizer, Parser from deeplenstronomy.image_generator import ImageGenerator -from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices +from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices, treat_map_like_user_dist from deeplenstronomy import surveys class Dataset(): @@ -338,6 +338,13 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, # Store configurations dataset.configurations = list(dataset.config_dict['GEOMETRY'].keys()) + # Handle image backgrounds if they exist + if len(parser.image_paths) > 0: + im_dir = parser.config_dict['BACKGROUNDS']["PATH"] + image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands) + else: + image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] + # If user-specified distributions exist, draw from them forced_inputs = {} max_size = dataset.size * 100 # maximum 100 epochs if timeseries @@ -352,6 +359,11 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, draw_param_names, draw_param_values = draw_from_user_dist(filename, max_size, mode, step) forced_inputs[filename] = {'names': draw_param_names, 'values': draw_param_values} + # If we want to iterate through map.txt, add the parameters to the forced inputs + if "ITERATE" in parser.config_dict['BACKGROUNDS']: + im_dir = parser.config_dict['BACKGROUNDS']["PATH"] + draw_param_names, draw_param_values = treat_map_like_user_dist(im_dir, max_size) + forced_inputs[im_dir + '/map.txt'] = {'names': draw_param_names, 'values': draw_param_values} # Overwrite the configuration dict with any forced values from user distribtuions force_param_inputs = _get_forced_sim_inputs(forced_inputs, dataset.configurations, dataset.bands) @@ -379,13 +391,6 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, # Initialize the ImageGenerator ImGen = ImageGenerator(return_planes, solve_lens_equation) - # Handle image backgrounds if they exist - if len(parser.image_paths) > 0: - im_dir = parser.config_dict['BACKGROUNDS']["PATH"] - image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands) - else: - image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:] - # Clear the sim_dicts out of memory if not os.path.exists(dataset.outdir): os.system('mkdir ' + dataset.outdir) @@ -506,6 +511,17 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True, metadata_df = pd.DataFrame(metadata) del metadata + # Delete images with sentinel redshifts + redshift_cols = [x for x in metadata_df.columns if x.find('REDSHIFT') != -1] + plane_cols = {int(x[x.index('PLANE_') + 6: x.index('-', x.index('PLANE_') + 1)]) : x for x in redshift_cols} + greatest_plane_col = plane_cols[max(plane_cols.keys())] + mask = metadata_df[greatest_plane_col].values < 10 + configuration_images = configuration_images[mask] + metadata_df = metadata_df[mask].copy().reset_index(drop=True) + if return_planes: + configuration_planes = configuration_planes[mask] + + # Save the images and metadata to the outdir if desired (ideal for large simulation production) if save_to_disk: #Images diff --git a/deeplenstronomy/input_reader.py b/deeplenstronomy/input_reader.py index 1fab03c..273c21e 100644 --- a/deeplenstronomy/input_reader.py +++ b/deeplenstronomy/input_reader.py @@ -364,6 +364,23 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0): # Set the PLANE's redshift in the config_dict if k_param == 'REDSHIFT': + # Check if drawn redshift is less than a closer plane, and redraw if necessary + if plane_num >= 2: + prev_plane_num = plane_num - 1 + tries = 0 + while tries < 10: + if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= draws[0]: + draws = self._draw(v_param['DISTRIBUTION'], bands) + tries += 1 + else: + break + else: + # Set a sentinal value to drop this system + if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10: + draws = [config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1] * len(bands) + else: + draws = [10] * len(bands) + config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = draws[0] for band, draw in zip(bands, draws): @@ -372,7 +389,20 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0): else: # Set the PLANE's redshift in the config_dict if k_param == 'REDSHIFT': - config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = v_param + # set the redshift to a sentinal value if it's less than a previous plane + if plane_num >= 2: + prev_plane_num = plane_num - 1 + if v_param < config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)]: + if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10: + config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1 + redshift = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1 + else: + config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = 10 + redshift = 10 + + else: + config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = v_param + for band in bands: for obj_num in range(1, config_dict['SIM_DICT']['PLANE_{0}-NUMBER_OF_OBJECTS'.format(plane_num)] + 1): @@ -521,6 +551,22 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0): if param_name in output_dict[band]: output_dict[band][param_name] = inputs[(param_name, band)] + + # Protect against non-physical geometries by setting a sentinal value + if param_name.find('REDSHIFT') != -1: + plane_num = int(param_name.split('PLANE_')[1].split('-')[0]) + if plane_num >= 2: + for prev_plane_num in range(1, plane_num): + prev_plane_param_name = param_name.replace('PLANE_{0}'.format(plane_num), 'PLANE_{0}'.format(prev_plane_num)) + if output_dict[band][param_name] <= output_dict[band][prev_plane_param_name]: + if output_dict[band][prev_plane_param_name] >= 10: + new_redshift = output_dict[band][prev_plane_param_name] + 1 + else: + new_redshift = 10 + + for b in output_dict.keys(): + output_dict[b][param_name] = new_redshift + else: print("WARNING: " + param_name + " is not present in the simulated dataset and may produce unexpected behavior. Use dataset.search() to find all expected names") diff --git a/deeplenstronomy/utils.py b/deeplenstronomy/utils.py index 0814876..eea184f 100644 --- a/deeplenstronomy/utils.py +++ b/deeplenstronomy/utils.py @@ -131,9 +131,21 @@ def read_distribution_file(filename): assert 'WEIGHT' in df.columns, "'WEIGHT' must be a column in {}".format(filename) return df - -def draw_from_user_dist(filename, size, mode, step=10): + +def treat_map_like_user_dist(im_dir, size): + """Use the iterate mode of draw_from_user_dist for map.txt. + + Args: + im_dir (str): name of directory containing map.txt + size (int): size of simulations + """ + df = pd.read_csv(im_dir + '/' + 'map.txt', delim_whitespace=True) + df['WEIGHT'] = 1.0 + return draw_from_user_dist("unused filename", size, 'iterate', df=df) + + +def draw_from_user_dist(filename, size, mode, step=10, df=None): """ Interpolate a user-specified N-dimensional probability distribution and sample from it. @@ -143,16 +155,18 @@ def draw_from_user_dist(filename, size, mode, step=10): size (int): the number of times to sample the probability distribution mode (str): choose from ['interpolate', 'sample'] step (int): the number of steps on the interpolation grid + df (pd.DataFrame): optional already read dataframe. Returns: parameters: list, the names of the paramters choices: array with entries as arrays of drawn parameters Raises: - NotImplementedError: if a mode other than "sample" or "interpolate" is passed + NotImplementedError: if a mode other than "sample" or "interpolate" or "iterate" is passed """ - df = read_distribution_file(filename) + if df is None: + df = read_distribution_file(filename) parameters = [x for x in df.columns if x != 'WEIGHT'] points = df[parameters].values @@ -184,11 +198,17 @@ def draw_from_user_dist(filename, size, mode, step=10): index_arr = np.random.choice(np.arange(len(points), dtype=int), size=size, p=weights / weights.sum()) choices = points[index_arr] + elif mode == 'iterate': + num_repeats = size // len(df) + 1 + index_arr = np.tile(np.arange(len(df)), num_repeats)[:size] + choices = points[index_arr] + else: raise NotImplementedError("unexpected mode passed, must be 'sample' or 'interpolate'") return parameters, choices + def read_images(im_dir, im_size, bands): """ Read images into memory and resize to match simulations. @@ -237,7 +257,7 @@ def read_images(im_dir, im_size, bands): return im_array -def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration): +def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration, overwrite=False): """ Sort image files based on map. If no map exists, sort randomly. @@ -246,6 +266,7 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat image_bank_size (int): number of images in user-specified bank config_dicts (List[dict]): list of config_dicts configuration (str): the configuration currently running + overwrite (bool): optionally overwrite sim_inputs instead of solving Returns: the indices of the images utilized for each config_dict @@ -277,13 +298,17 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat if len(bad_columns) != 0: - print(config_dicts[0].keys()) print("WARNING {0} are not found in the simulated dataset for {1}".format(', '.join(bad_columns), configuration) + ". You may see unexpected results. Use the dataset.search() function to find the correct column names.") if len(map_columns) == 0: # Sort randomly image_indices = np.random.choice(np.arange(image_bank_size), replace=True, size=len(config_dicts)) + + elif overwrite: + # Maintain order for iterative insertion. + num_repeats = len(config_dicts) // image_bank_size + 1 + image_indices = np.tile(np.arange(image_bank_size), num_repeats)[:len(config_dicts)] else: # Trim df to just the columns needed diff --git a/setup.py b/setup.py index ce2d513..39160ba 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( name='deeplenstronomy', - version='0.0.2.0', + version='0.0.2.1', description='wrap lenstronomy for efficient simulation generation', long_description=long_description, long_description_content_type='text/markdown',