Skip to content

Commit

Permalink
This commit contains the updates for version 0.0.2.1.
Browse files Browse the repository at this point in the history
The main changes are as follows:

1. If sampling redshfits from USERDISTs or DISTRIBUTIONs caused unphysical geometries (e.g. PLANE_2 having a REDSHIFT less than PLANE_1), the simulated system is deleted from the images and metadata.
2. The BACKGROUNDS section of the configuration file now accepts an ITERATE key for which the value is unused (set it to None). If ITERATE mode is used, the images are used in the simulations iteratively which ensures that all images get used.
  • Loading branch information
Robert Morgan committed Nov 15, 2021
1 parent fd66ec0 commit 566fa61
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 16 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
History
-------

0.0.2.1 (2021-11-15)
+++++++++++++++++++++
* ITERATE option for image BACKGROUNDS
* Drop any systems with unphysical redshifts from simulations

0.0.2.0 (2021-05-07)
+++++++++++++++++++++
* Verified stability of all new time series features
Expand Down
32 changes: 24 additions & 8 deletions deeplenstronomy/deeplenstronomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from deeplenstronomy.input_reader import Organizer, Parser
from deeplenstronomy.image_generator import ImageGenerator
from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices
from deeplenstronomy.utils import draw_from_user_dist, organize_image_backgrounds, read_images, check_background_indices, treat_map_like_user_dist
from deeplenstronomy import surveys

class Dataset():
Expand Down Expand Up @@ -338,6 +338,13 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
# Store configurations
dataset.configurations = list(dataset.config_dict['GEOMETRY'].keys())

# Handle image backgrounds if they exist
if len(parser.image_paths) > 0:
im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands)
else:
image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:]

# If user-specified distributions exist, draw from them
forced_inputs = {}
max_size = dataset.size * 100 # maximum 100 epochs if timeseries
Expand All @@ -352,6 +359,11 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
draw_param_names, draw_param_values = draw_from_user_dist(filename, max_size, mode, step)
forced_inputs[filename] = {'names': draw_param_names, 'values': draw_param_values}

# If we want to iterate through map.txt, add the parameters to the forced inputs
if "ITERATE" in parser.config_dict['BACKGROUNDS']:
im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
draw_param_names, draw_param_values = treat_map_like_user_dist(im_dir, max_size)
forced_inputs[im_dir + '/map.txt'] = {'names': draw_param_names, 'values': draw_param_values}

# Overwrite the configuration dict with any forced values from user distribtuions
force_param_inputs = _get_forced_sim_inputs(forced_inputs, dataset.configurations, dataset.bands)
Expand Down Expand Up @@ -379,13 +391,6 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
# Initialize the ImageGenerator
ImGen = ImageGenerator(return_planes, solve_lens_equation)

# Handle image backgrounds if they exist
if len(parser.image_paths) > 0:
im_dir = parser.config_dict['BACKGROUNDS']["PATH"]
image_backgrounds = read_images(im_dir, parser.config_dict['IMAGE']['PARAMETERS']['numPix'], dataset.bands)
else:
image_backgrounds = np.zeros((len(dataset.bands), parser.config_dict['IMAGE']['PARAMETERS']['numPix'], parser.config_dict['IMAGE']['PARAMETERS']['numPix']))[np.newaxis,:]

# Clear the sim_dicts out of memory
if not os.path.exists(dataset.outdir):
os.system('mkdir ' + dataset.outdir)
Expand Down Expand Up @@ -506,6 +511,17 @@ def make_dataset(config, dataset=None, save_to_disk=False, store_in_memory=True,
metadata_df = pd.DataFrame(metadata)
del metadata

# Delete images with sentinel redshifts
redshift_cols = [x for x in metadata_df.columns if x.find('REDSHIFT') != -1]
plane_cols = {int(x[x.index('PLANE_') + 6: x.index('-', x.index('PLANE_') + 1)]) : x for x in redshift_cols}
greatest_plane_col = plane_cols[max(plane_cols.keys())]
mask = metadata_df[greatest_plane_col].values < 10
configuration_images = configuration_images[mask]
metadata_df = metadata_df[mask].copy().reset_index(drop=True)
if return_planes:
configuration_planes = configuration_planes[mask]


# Save the images and metadata to the outdir if desired (ideal for large simulation production)
if save_to_disk:
#Images
Expand Down
48 changes: 47 additions & 1 deletion deeplenstronomy/input_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,23 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):

# Set the PLANE's redshift in the config_dict
if k_param == 'REDSHIFT':
# Check if drawn redshift is less than a closer plane, and redraw if necessary
if plane_num >= 2:
prev_plane_num = plane_num - 1
tries = 0
while tries < 10:
if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= draws[0]:
draws = self._draw(v_param['DISTRIBUTION'], bands)
tries += 1
else:
break
else:
# Set a sentinal value to drop this system
if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10:
draws = [config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1] * len(bands)
else:
draws = [10] * len(bands)

config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = draws[0]

for band, draw in zip(bands, draws):
Expand All @@ -372,7 +389,20 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):
else:
# Set the PLANE's redshift in the config_dict
if k_param == 'REDSHIFT':
config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = v_param
# set the redshift to a sentinal value if it's less than a previous plane
if plane_num >= 2:
prev_plane_num = plane_num - 1
if v_param < config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)]:
if config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] >= 10:
config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1
redshift = config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(prev_plane_num)] + 1
else:
config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = 10
redshift = 10

else:
config_dict['SIM_DICT']['PLANE_{0}-REDSHIFT'.format(plane_num)] = v_param


for band in bands:
for obj_num in range(1, config_dict['SIM_DICT']['PLANE_{0}-NUMBER_OF_OBJECTS'.format(plane_num)] + 1):
Expand Down Expand Up @@ -521,6 +551,22 @@ def _flatten_and_fill(self, config_dict, cosmo, inputs, objid=0):

if param_name in output_dict[band]:
output_dict[band][param_name] = inputs[(param_name, band)]

# Protect against non-physical geometries by setting a sentinal value
if param_name.find('REDSHIFT') != -1:
plane_num = int(param_name.split('PLANE_')[1].split('-')[0])
if plane_num >= 2:
for prev_plane_num in range(1, plane_num):
prev_plane_param_name = param_name.replace('PLANE_{0}'.format(plane_num), 'PLANE_{0}'.format(prev_plane_num))
if output_dict[band][param_name] <= output_dict[band][prev_plane_param_name]:
if output_dict[band][prev_plane_param_name] >= 10:
new_redshift = output_dict[band][prev_plane_param_name] + 1
else:
new_redshift = 10

for b in output_dict.keys():
output_dict[b][param_name] = new_redshift

else:
print("WARNING: " + param_name + " is not present in the simulated dataset and may produce unexpected behavior. Use dataset.search(<param name>) to find all expected names")

Expand Down
37 changes: 31 additions & 6 deletions deeplenstronomy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,21 @@ def read_distribution_file(filename):
assert 'WEIGHT' in df.columns, "'WEIGHT' must be a column in {}".format(filename)

return df


def draw_from_user_dist(filename, size, mode, step=10):

def treat_map_like_user_dist(im_dir, size):
"""Use the iterate mode of draw_from_user_dist for map.txt.
Args:
im_dir (str): name of directory containing map.txt
size (int): size of simulations
"""
df = pd.read_csv(im_dir + '/' + 'map.txt', delim_whitespace=True)
df['WEIGHT'] = 1.0
return draw_from_user_dist("unused filename", size, 'iterate', df=df)


def draw_from_user_dist(filename, size, mode, step=10, df=None):
"""
Interpolate a user-specified N-dimensional probability distribution and
sample from it.
Expand All @@ -143,16 +155,18 @@ def draw_from_user_dist(filename, size, mode, step=10):
size (int): the number of times to sample the probability distribution
mode (str): choose from ['interpolate', 'sample']
step (int): the number of steps on the interpolation grid
df (pd.DataFrame): optional already read dataframe.
Returns:
parameters: list, the names of the paramters
choices: array with entries as arrays of drawn parameters
Raises:
NotImplementedError: if a mode other than "sample" or "interpolate" is passed
NotImplementedError: if a mode other than "sample" or "interpolate" or "iterate" is passed
"""

df = read_distribution_file(filename)
if df is None:
df = read_distribution_file(filename)

parameters = [x for x in df.columns if x != 'WEIGHT']
points = df[parameters].values
Expand Down Expand Up @@ -184,11 +198,17 @@ def draw_from_user_dist(filename, size, mode, step=10):
index_arr = np.random.choice(np.arange(len(points), dtype=int), size=size, p=weights / weights.sum())
choices = points[index_arr]

elif mode == 'iterate':
num_repeats = size // len(df) + 1
index_arr = np.tile(np.arange(len(df)), num_repeats)[:size]
choices = points[index_arr]

else:
raise NotImplementedError("unexpected mode passed, must be 'sample' or 'interpolate'")

return parameters, choices


def read_images(im_dir, im_size, bands):
"""
Read images into memory and resize to match simulations.
Expand Down Expand Up @@ -237,7 +257,7 @@ def read_images(im_dir, im_size, bands):

return im_array

def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration):
def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configuration, overwrite=False):
"""
Sort image files based on map. If no map exists, sort randomly.
Expand All @@ -246,6 +266,7 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat
image_bank_size (int): number of images in user-specified bank
config_dicts (List[dict]): list of config_dicts
configuration (str): the configuration currently running
overwrite (bool): optionally overwrite sim_inputs instead of solving
Returns:
the indices of the images utilized for each config_dict
Expand Down Expand Up @@ -277,13 +298,17 @@ def organize_image_backgrounds(im_dir, image_bank_size, config_dicts, configurat


if len(bad_columns) != 0:
print(config_dicts[0].keys())
print("WARNING {0} are not found in the simulated dataset for {1}".format(', '.join(bad_columns), configuration) +
". You may see unexpected results. Use the dataset.search(<param_name>) function to find the correct column names.")

if len(map_columns) == 0:
# Sort randomly
image_indices = np.random.choice(np.arange(image_bank_size), replace=True, size=len(config_dicts))

elif overwrite:
# Maintain order for iterative insertion.
num_repeats = len(config_dicts) // image_bank_size + 1
image_indices = np.tile(np.arange(image_bank_size), num_repeats)[:len(config_dicts)]

else:
# Trim df to just the columns needed
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

setup(
name='deeplenstronomy',
version='0.0.2.0',
version='0.0.2.1',
description='wrap lenstronomy for efficient simulation generation',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down

0 comments on commit 566fa61

Please sign in to comment.