diff --git a/README.rst b/README.rst index 1d8c504e..bf76e6a4 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Neurodamus Neurodamus is a BBP Simulation Control application for Neuron. -The Python implementation offers a comprehensive Python API for fine tunning of the simulation, initially defined by a BlueConfig file. +The Python implementation offers a comprehensive Python API for fine tuning of the simulation, initially defined by a BlueConfig file. Description @@ -81,7 +81,7 @@ An example of a full installation with a simulation run can be found in the work Docker container ================ -Alternaltively, you can start directly a neurodamus docker container where all the packages are built. +Alternatively, you can start directly a neurodamus docker container where all the packages are built. With the container, you can build your mod files and run simulations. See instructions in `docker/README.md `_. diff --git a/docs/architecture.rst b/docs/architecture.rst index ef284ece..a8d7e9ef 100644 --- a/docs/architecture.rst +++ b/docs/architecture.rst @@ -317,6 +317,57 @@ Indeed public API represents exactly these 3 cases: cell_manager.finalize() conn_manager.create_connections() +Dry Run +------- + +A dry run mode was introduced to help users in understanding how many nodes and tasks are +necessary to run a specific circuit. In the future this mode will also be used to improve +load balancing. + +By running a dry run, using the `--dry-run` flag, the user will NOT run an actual simulation but +will get a summary of the estimated memory used for cells and synapses, including also the overhead +memory necessary to load libraries and neurodamus data structures. +A grand total is provided to the user as well as a per-cell type and per-synapse type breakdown. + +In this paragraph we will go a bit more into details on how the estimation is done. + +Below you can see the workflow of the dry run mode: + +.. image:: ./img/neurodamus_dry_run.png + +First of all, since memory usage of cells is strongly connected to their metypes, we create a dictionary +of all the gids corresponding to a certain metype combination. This dictionary is then crosschecked +with the one imported from the external `memory_usage.json` file, which contains the memory usage +of metype combinations coming from a previous execution of dry run on this or any other circuits. +As long as the `memory_usage.json` file is present in the working directory, it will be loaded. + +If the metype combination is not present in the external file, we compute the memory usage of the +metype combination by instantiating a group of (maximum) 50 cells per metype combination and then +measuring memory usage before and after the instantiation. The memory usage is then averaged over +the number of cells instantiated and the result are saved internally and added to the external +`memory_usage.json` file. Any combination already present in the external file is simply imported +and is not instantiated again in order to speed up the execution. One can simply delete the `memory_usage.json` +file (or any relevant lines) in order to force the re-evaluation of all (or some) metype +combinations. + +The memory usage of synapses is instead estimated using a pre-computed look up table, which is +hardcoded in the `SynapseMemoryUsage` class. The values used for this look up table were computed by using an external script +to instantiate 1M synapses of each type, each with 1K connections, and then measuring the memory +usage before and after the instantiation. The memory usage is then averaged over the number of +synapses instantiated. The script used to perform this operation `synstat.py` is available for the user +and is archived in this repo in the `_benchmarks` folder. + +Having these pre-computed values allows us to simply count the amount of synapses of each type +and multiply it by the corresponding memory usage value. + +Apart from both cells and synapses, we also need to take into account the memory usage of neurodamus +itself, e.g. data structures, loaded libraries and so on. This is done by measuring the RSS of the neurodamus +process before any of the actual instantiation is done. This value, since it's averaged over all ranks that take +part in the execution, is then multiplied by the number of ranks used in the execution. + +The final result is then printed to the user in a human readable format. + + Development ------------ diff --git a/docs/examples.rst b/docs/examples.rst index 8f1ab7b7..5fe5ba50 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -87,21 +87,28 @@ In order to obtain a more accurate estimation of the resources needed for a simu users can also run Neurodamus in dry run mode. This functionality is only available for libsonata circuits. MVD3 circuits are not supported. -This mode will instantiate all the cells but won't run the actual simulation. -The user can then check the memory usage of the simulation as it's printed on -the terminal and decide how to proceed. - -The mode also provides detailed information on the memory usage of each cell type -and the total memory usage of the simulation. +This mode will partially instantiate cells and synapses to get a statistical overview +of the memory used but won't run the actual simulation. +The user can then check the estimated memory usage of the simulation as it's printed on +the terminal at the end of the execution. In a future update we will also integrate +indications and suggestions on the number of tasks and nodes to use for that circuit +based on the amount of memory used during the dry run. + +The mode also provides detailed information on the memory usage of each cell metype, +synapse type and the total estimated memory usage of the simulation, including the +memory overhead dictated by loading of libraries and data structures. + +The information on the cell memory usage is also automatically saved in a file called +``memory_usage.json`` in the working directory. This json file contains a +dictionary with the memory usage of each cell metype in the circuit and is automatically +loaded in any further execution of Neurodamus in dry run mode, in order to speed up the execution. +In future we plan to also use this file to improve the load balance of actual simulations. To run Neurodamus in dry run mode, the user can use the ``--dry-run`` flag when launching Neurodamus. For example: ``neurodamus --configFile=BlueConfig --dry-run`` -At the moment dry run mode only supports memory estimation for cell instantiation. Evaluation -of other resources (e.g. connections) will be added in the future. - Neurodamus for Developers ------------------------- diff --git a/docs/img/neurodamus_dry_run.png b/docs/img/neurodamus_dry_run.png new file mode 100644 index 00000000..43105234 Binary files /dev/null and b/docs/img/neurodamus_dry_run.png differ diff --git a/neurodamus/cell_distributor.py b/neurodamus/cell_distributor.py index 7c33f95d..89e01c1f 100644 --- a/neurodamus/cell_distributor.py +++ b/neurodamus/cell_distributor.py @@ -142,6 +142,7 @@ def __init__(self, circuit_conf, target_manager, _run_conf=None, **_kw): self._binfo = None self._pc = Nd.pc self._conn_managers_per_src_pop = weakref.WeakValueDictionary() + self._metype_counts = None if type(circuit_conf.CircuitPath) is str: self._init_config(circuit_conf, self._target_spec.population or '') @@ -163,6 +164,7 @@ def __init__(self, circuit_conf, target_manager, _run_conf=None, **_kw): is_default = property(lambda self: self._circuit_name is None) is_virtual = property(lambda self: False) connection_managers = property(lambda self: self._conn_managers_per_src_pop) + metype_counts = property(lambda self: self._metype_counts) def is_initialized(self): return self._local_nodes is not None @@ -219,6 +221,8 @@ def load_nodes(self, load_balancer=None, *, _loader=None, loader_opts=None): else: gidvec, me_infos, *cell_counts = self._load_nodes_balance(loader_f, load_balancer) self._local_nodes.add_gids(gidvec, me_infos) + if SimConfig.dry_run: + self._metype_counts = me_infos.counts self._total_cells = cell_counts[0] logging.info(" => Loaded info about %d target cells (out of %d)", *cell_counts) @@ -253,7 +257,7 @@ def _load_nodes_balance(self, loader_f, load_balancer): return gidvec, me_infos, total_cells, full_size # - - def finalize(self, *_): + def finalize(self, imported_memory_dict=None, *_): """Instantiates cells and initializes the network in the simulator. Note: it should be called after all cell distributors have done load_nodes() @@ -262,11 +266,13 @@ def finalize(self, *_): if self._local_nodes is None: return logging.info("Finalizing cells... Gid offset: %d", self._local_nodes.offset) - self._instantiate_cells() + memory_dict = self._instantiate_cells(imported_memory_dict) self._update_targets_local_gids() self._init_cell_network() self._local_nodes.clear_cell_info() + return memory_dict + @mpi_no_errors def _instantiate_cells(self, _CellType=None): CellType = _CellType or self.CellType @@ -286,52 +292,69 @@ def _instantiate_cells(self, _CellType=None): self._store_cell(gid + cell_offset, cell) @mpi_no_errors - def _instantiate_cells_dry(self, _CellType=None): + def _instantiate_cells_dry(self, _CellType=None, imported_memory_dict=None): CellType = _CellType or self.CellType assert CellType is not None, "Undefined CellType in Manager" Nd.execute("xopen_broadcast_ = 0") logging.info(" > Dry run on cells... (%d in Rank 0)", len(self._local_nodes)) - logging.info("Memory usage for metype combinations:") + logging.info("Memory usage for newly instantiated metype combinations:") cell_offset = self._local_nodes.offset gid_info_items = self._local_nodes.items() - prev_emodel = None + prev_etype = None prev_mtype = None start_memory = get_mem_usage() n_cells = 0 memory_dict = {} - for gid, cell_info in gid_info_items: + filtered_gid_info_items = self._filter_memory_dict(imported_memory_dict, gid_info_items) + + for gid, cell_info in filtered_gid_info_items: diff_mtype = prev_mtype != cell_info.mtype - diff_emodel = prev_emodel != cell_info.emodel - first = prev_emodel is None and prev_mtype is None - if (diff_mtype or diff_emodel) and not first: + diff_etype = prev_etype != cell_info.etype + first = prev_etype is None and prev_mtype is None + if (diff_mtype or diff_etype) and not first: end_memory = get_mem_usage() memory_allocated = end_memory - start_memory - log_all(logging.INFO, " * %s %s: %f MB averaged over %d cells", - prev_emodel, prev_mtype, memory_allocated/n_cells, n_cells) - memory_dict[(prev_emodel, prev_mtype)] = memory_allocated/n_cells + log_all(logging.INFO, " * %s %s: %.2f MB averaged over %d cells", + prev_etype, prev_mtype, memory_allocated/n_cells, n_cells) + memory_dict[(prev_etype, prev_mtype)] = memory_allocated/n_cells start_memory = end_memory n_cells = 0 cell = CellType(gid, cell_info, self._circuit_conf) self._store_cell(gid + cell_offset, cell) - prev_emodel = cell_info.emodel + prev_etype = cell_info.etype prev_mtype = cell_info.mtype n_cells += 1 - if prev_emodel is not None and prev_mtype is not None: + if prev_etype is not None and prev_mtype is not None: end_memory = get_mem_usage() memory_allocated = end_memory - start_memory log_all(logging.INFO, " * %s %s: %f MB averaged over %d cells", - prev_emodel, prev_mtype, memory_allocated/n_cells, n_cells) - memory_dict[(prev_emodel, prev_mtype)] = memory_allocated/n_cells + prev_etype, prev_mtype, memory_allocated/n_cells, n_cells) + memory_dict[(prev_etype, prev_mtype)] = memory_allocated/n_cells + + if imported_memory_dict is not None: + memory_dict.update(imported_memory_dict) return memory_dict + def _filter_memory_dict(self, imported_memory_dict, gid_info_items): + if imported_memory_dict is not None: + filtered_gid_info_items = ( + (gid, cell_info) + for gid, cell_info in gid_info_items + if (cell_info.etype, cell_info.mtype) not in imported_memory_dict + ) + else: + filtered_gid_info_items = gid_info_items + + return filtered_gid_info_items + def _update_targets_local_gids(self): logging.info(" > Updating targets") cell_offset = self._local_nodes.offset @@ -559,7 +582,7 @@ def load_nodes(self, load_balancer=None, **kw): log_verbose("Nodes Format: %s, Loader: %s", self._node_format, loader.__name__) return super().load_nodes(load_balancer, _loader=loader, loader_opts=loader_opts) - def _instantiate_cells(self, *_): + def _instantiate_cells(self, imported_memory_dict, *_): if self.CellType is not NotImplemented: return super()._instantiate_cells(self.CellType) conf = self._circuit_conf @@ -570,7 +593,7 @@ def _instantiate_cells(self, *_): log_verbose("Loading '%s' morphologies from: %s", CellType.morpho_extension, conf.MorphologyPath) if SimConfig.dry_run: - super()._instantiate_cells_dry(CellType) + return super()._instantiate_cells_dry(CellType, imported_memory_dict) else: super()._instantiate_cells(CellType) diff --git a/neurodamus/core/nodeset.py b/neurodamus/core/nodeset.py index 48dc9153..c80ba5bb 100644 --- a/neurodamus/core/nodeset.py +++ b/neurodamus/core/nodeset.py @@ -313,7 +313,7 @@ def intersection(self, other: _NodeSetBase, raw_gids=False, _quick_check=False): # Like that we could still keep ranges internally and have PROPER API to get raw ids return numpy.add(intersect, 1, dtype=intersect.dtype) return numpy.add(intersect, self.offset + 1, dtype=intersect.dtype) - return [] + return numpy.array([], dtype="uint32") def intersects(self, other): return self.intersection(other, _quick_check=True) diff --git a/neurodamus/io/cell_readers.py b/neurodamus/io/cell_readers.py index 745c9e36..c0704111 100644 --- a/neurodamus/io/cell_readers.py +++ b/neurodamus/io/cell_readers.py @@ -248,6 +248,8 @@ def fetch_MEinfo(node_reader, gidvec, combo_file, meinfo): mtypes = node_reader.mtypes(indexes) emodels = node_reader.emodels(indexes) \ if combo_file else None # Rare but we may not need emodels (ngv) + etypes = node_reader.etypes(indexes) \ + if combo_file else None exc_mini_freqs = node_reader.exc_mini_frequencies(indexes) \ if node_reader.hasMiniFrequencies() else None inh_mini_freqs = node_reader.inh_mini_frequencies(indexes) \ @@ -259,8 +261,8 @@ def fetch_MEinfo(node_reader, gidvec, combo_file, meinfo): positions = node_reader.positions(indexes) rotations = node_reader.rotations(indexes) if node_reader.rotated else None - meinfo.load_infoNP(gidvec, morpho_names, emodels, mtypes, threshold_currents, holding_currents, - exc_mini_freqs, inh_mini_freqs, positions, rotations) + meinfo.load_infoNP(gidvec, morpho_names, emodels, mtypes, etypes, threshold_currents, + holding_currents, exc_mini_freqs, inh_mini_freqs, positions, rotations) def load_sonata(circuit_conf, all_gids, stride=1, stride_offset=0, *, @@ -279,7 +281,7 @@ def load_nodes_base_info(): total_cells = node_pop.size if SimConfig.dry_run: logging.info("Sonata dry run mode: looking for unique metype instances") - gid_metype_bundle = _retrieve_unique_metypes(node_pop, all_gids) + gid_metype_bundle, count_per_metype = _retrieve_unique_metypes(node_pop, all_gids) gidvec = dry_run_distribution(gid_metype_bundle, stride, stride_offset, total_cells) else: gidvec = split_round_robin(all_gids, stride, stride_offset, total_cells) @@ -289,8 +291,13 @@ def load_nodes_base_info(): node_sel = libsonata.Selection(gidvec - 1) # 0-based node indices morpho_names = node_pop.get_attribute("morphology", node_sel) mtypes = node_pop.get_attribute("mtype", node_sel) - emodels = [emodel.removeprefix("hoc:") - for emodel in node_pop.get_attribute("model_template", node_sel)] + try: + etypes = node_pop.get_attribute("etype", node_sel) + except libsonata.SonataError: + logging.warning("etype not found in node population, setting to None") + etypes = None + _model_templates = node_pop.get_attribute("model_template", node_sel) + emodel_templates = [emodel.removeprefix("hoc:") for emodel in _model_templates] if set(["exc_mini_frequency", "inh_mini_frequency"]).issubset(attr_names): exc_mini_freqs = node_pop.get_attribute("exc_mini_frequency", node_sel) inh_mini_freqs = node_pop.get_attribute("inh_mini_frequency", node_sel) @@ -309,13 +316,17 @@ def load_nodes_base_info(): rotations = _get_rotations(node_pop, node_sel) # For Sonata and new emodel hoc template, we need additional attributes for building metype + # TODO: validate it's really the emodel_templates var we should pass here, or etype add_params_list = None if not has_extra_data \ - else _getNeededAttributes(node_pop, circuit_conf.METypePath, emodels, gidvec-1) + else _getNeededAttributes(node_pop, circuit_conf.METypePath, emodel_templates, gidvec-1) meinfos = METypeManager() - meinfos.load_infoNP(gidvec, morpho_names, emodels, mtypes, threshold_currents, - holding_currents, exc_mini_freqs, inh_mini_freqs, positions, - rotations, add_params_list) + meinfos.load_infoNP(gidvec, morpho_names, emodel_templates, mtypes, etypes, + threshold_currents, holding_currents, + exc_mini_freqs, inh_mini_freqs, positions, rotations, + add_params_list) + if SimConfig.dry_run: + meinfos.counts = count_per_metype return gidvec, meinfos, total_cells # If dynamic properties are not specified simply return early @@ -480,8 +491,10 @@ def _retrieve_unique_metypes(node_reader, all_gids) -> dict: raise Exception(f"Reader type {type(node_reader)} incompatible with dry run.") unique_metypes = defaultdict(list) + count_per_metype = defaultdict(int) for gid, emodel, mtype in zip(gidvec, emodels, mtypes): unique_metypes[(emodel, mtype)].append(gid) + count_per_metype[(emodel, mtype)] += 1 logging.info("Out of %d cells, found %d unique mtype+emodel combination", len(gidvec), len(unique_metypes)) @@ -498,4 +511,4 @@ def _retrieve_unique_metypes(node_reader, all_gids) -> dict: else: gid_metype_bundle.append(unique_metypes[key]) - return gid_metype_bundle + return gid_metype_bundle, count_per_metype diff --git a/neurodamus/metype.py b/neurodamus/metype.py index 6c2ccc10..a4faf71f 100644 --- a/neurodamus/metype.py +++ b/neurodamus/metype.py @@ -144,7 +144,7 @@ def __init__(self, gid, meinfo, circuit_conf): mepath = circuit_conf.METypePath morpho_path = circuit_conf.MorphologyPath detailed_axon = circuit_conf.DetailedAxon - super().__init__(gid, mepath, meinfo.emodel, morpho_path, meinfo, detailed_axon) + super().__init__(gid, mepath, meinfo.emodel_tpl, morpho_path, meinfo, detailed_axon) def _instantiate_cell(self, gid, etype_path, emodel, morpho_path, meinfos_v6, detailed_axon): """Instantiates a SSCx v6 cell @@ -194,7 +194,7 @@ def __init__(self, gid, meinfo, circuit_conf): mepath = circuit_conf.METypePath morpho_path = circuit_conf.MorphologyPath if isinstance(meinfo, METypeItem): - meinfo = meinfo.emodel # Compat with loading V5 cells from Sonata Nodes + meinfo = meinfo.emodel_tpl # Compat with loading V5 cells from Sonata Nodes melabel = self._load_template(meinfo, mepath) super().__init__(gid, mepath, melabel, morpho_path) @@ -278,13 +278,13 @@ def __init__(self, gid, cell): class METypeItem(object): """ Metadata about an METype, each possibly used by several cells. """ - __slots__ = ("morph_name", "layer", "fullmtype", "etype", "emodel", "combo_name", + __slots__ = ("morph_name", "layer", "fullmtype", "etype", "emodel_tpl", "combo_name", "mtype", "threshold_current", "holding_current", "exc_mini_frequency", "inh_mini_frequency", "add_params", "local_to_global_matrix", "extra_attrs") - def __init__(self, morph_name, layer=None, fullmtype=None, etype=None, emodel=None, + def __init__(self, morph_name, layer=None, fullmtype=None, etype=None, emodel_tpl=None, combo_name=None, mtype=None, threshold_current=0, holding_current=0, exc_mini_frequency=0, inh_mini_frequency=0, add_params=None, position=None, rotation=None, scale=1.0): @@ -292,7 +292,7 @@ def __init__(self, morph_name, layer=None, fullmtype=None, etype=None, emodel=No self.layer = layer self.fullmtype = fullmtype self.etype = etype - self.emodel = emodel + self.emodel_tpl = emodel_tpl self.combo_name = combo_name self.mtype = mtype self.threshold_current = float(threshold_current) @@ -341,12 +341,15 @@ class METypeManager(dict): """ Map to hold specific METype info and provide retrieval by gid """ + counts = None + """Store counts of cells per METype""" + def insert(self, gid, morph_name, *me_data, **kwargs): """Function to add an METypeItem to internal data structure """ self[int(gid)] = METypeItem(morph_name, *me_data, **kwargs) - def load_infoNP(self, gidvec, morph_list, emodels, mtypes, + def load_infoNP(self, gidvec, morph_list, model_templates, mtypes, etypes, threshold_currents=None, holding_currents=None, exc_mini_freqs=None, inh_mini_freqs=None, positions=None, rotations=None, @@ -364,7 +367,8 @@ def load_infoNP(self, gidvec, morph_list, emodels, mtypes, add_params = add_params_list[idx] if add_params_list is not None else None self[int(gid)] = METypeItem( morph_list[idx], - emodel=emodels and emodels[idx], + etype=etypes[idx] if etypes is not None else None, + emodel_tpl=model_templates and model_templates[idx], mtype=mtype, # TODO: check this threshold_current=th_current, holding_current=hd_current, diff --git a/neurodamus/node.py b/neurodamus/node.py index 98368239..2ac3dd10 100644 --- a/neurodamus/node.py +++ b/neurodamus/node.py @@ -33,7 +33,8 @@ from .utils import compat from .utils.logging import log_stage, log_verbose, log_all from .utils.memory import trim_memory, pool_shrink, free_event_queues, print_mem_usage -from .utils.memory import SynapseMemoryUsage +from .utils.memory import SynapseMemoryUsage, export_memory_usage_to_json, get_task_level_mem_usage +from .utils.memory import import_memory_usage_from_json, pretty_printing_memory_mb from .utils.timeit import TimerManager, timeit from .core.coreneuron_configuration import CoreConfig # Internal Plugins @@ -418,6 +419,11 @@ def create_cells(self, load_balance=None): """Instantiate and distributes the cells of the network. Any targets will be updated to know which cells are local to the cpu. """ + if SimConfig.dry_run: + logging.info("Memory usage after inizialization:") + print_mem_usage() + _, _, self.avg_tasks_usage_mb, _ = get_task_level_mem_usage() + # We wont go further if ProspectiveHosts is defined to some other cpu count prosp_hosts = self._run_conf.get("ProspectiveHosts") if load_balance and prosp_hosts not in (None, MPI.size): @@ -467,14 +473,60 @@ def create_cells(self, load_balance=None): # Let the cell managers have any final say in the cell objects log_stage("FINALIZING CIRCUIT CELLS") + + if ospath.exists("memory_usage.json") and SimConfig.dry_run: + logging.info("Loading memory usage from memory_usage.json...") + imported_memory_dict = import_memory_usage_from_json("memory_usage.json") + else: + imported_memory_dict = None + for cell_manager in self._circuits.all_node_managers(): log_stage("Circuit %s", cell_manager.circuit_name or "(default)") - cell_manager.finalize() + if SimConfig.dry_run and cell_manager.circuit_name is None: + logging.warning("Dry-run ignoring empty circuit...") + continue + memory_dict = cell_manager.finalize(imported_memory_dict) + metype_counts = cell_manager.metype_counts + if SimConfig.dry_run: + if memory_dict is None: + memory_dict = {} + self.full_mem_dict = self._collect_cell_counts(memory_dict) + self.cells_total_memory = self._calc_full_mem_estimate(self.full_mem_dict, + metype_counts) # Final bits after we have all cell managers self._circuits.global_manager.finalize() SimConfig.update_connection_blocks(self._circuits.alias) + @staticmethod + def _collect_cell_counts(memory_dict): + mem_dict_list = [memory_dict] + [None] * (MPI.size - 1) # send to rank0 + full_mem_list = MPI.py_alltoall(mem_dict_list) + if MPI.rank == 0: + full_mem_dict = {} + for mem_dict in full_mem_list: + full_mem_dict.update(mem_dict) + return full_mem_dict + + @staticmethod + def _calc_full_mem_estimate(full_mem_dict, metype_counts): + + memory_total = 0 + + if MPI.rank == 0: + export_memory_usage_to_json(full_mem_dict, "memory_usage.json") + logging.debug("Memory usage:") + for metype, mem in full_mem_dict.items(): + logging.debug(" %s: %f", metype, mem) + logging.debug("Number of cells per METype combination:") + if metype_counts is not None: + for metype, count in metype_counts.items(): + memory_total += count * full_mem_dict[metype] + logging.debug(" %s: %d", metype, count) + logging.info(" Total memory usage for cells: %.2f MB", memory_total) + + return memory_total + # - @mpi_no_errors @timeit(name="Synapse creation") @@ -502,7 +554,13 @@ def create_synapses(self): self._load_projections(pname, projection) if SimConfig.dry_run: - self._collect_display_syn_counts(synapse_counter) + self.syn_total_memory = self._collect_display_syn_counts(synapse_counter) + total_memory_overhead = self.avg_tasks_usage_mb*MPI.size + if MPI.rank == 0: + total_memory = self.cells_total_memory + self.syn_total_memory/1024 \ + + total_memory_overhead + logging.info("Total estimated memory usage for overhead + synapses + cells: %s", + pretty_printing_memory_mb(total_memory)) return log_stage("Configuring connections...") @@ -658,11 +716,14 @@ def _collect_display_syn_counts(local_syn_counter): inh += count if synapse_type >= 100: exc += count - logging.info(" - Estimated synapse memory usage (KB):") + logging.info(" - Estimated synapse memory usage (MB):") in_mem = SynapseMemoryUsage.get_memory_usage(inh, "ProbGABAAB") ex_mem = SynapseMemoryUsage.get_memory_usage(exc, "ProbAMPANMDA") - logging.info(f" - Inhibitory: {in_mem}") - logging.info(f" - Excitatory: {ex_mem}") + logging.info(f" - Inhibitory: {in_mem/1024:.2f}") + logging.info(f" - Excitatory: {ex_mem/1024:.2f}") + logging.info(f" - Total: {(in_mem + ex_mem)/1024:.2f}") + + return in_mem + ex_mem # - @mpi_no_errors @@ -1658,7 +1719,7 @@ def cleanup(self): if not SimConfig.use_coreneuron or SimConfig.simulate_model is False: self.clear_model(avoid_creating_objs=True) - if SimConfig.delete_corenrn_data: + if SimConfig.delete_corenrn_data and not SimConfig.dry_run: data_folder = SimConfig.coreneuron_datadir logging.info("Deleting intermediate data in %s", data_folder) diff --git a/neurodamus/target_manager.py b/neurodamus/target_manager.py index fa916dc6..a57dac40 100644 --- a/neurodamus/target_manager.py +++ b/neurodamus/target_manager.py @@ -526,7 +526,7 @@ def pop_gid_intersect(nodeset: _NodeSetBase, raw_gids=False): return [] if raw_gids: - assert len(self.nodesets) != 1, "Multiple populations when asking for raw gids" + assert len(self.nodesets) == 1, "Multiple populations when asking for raw gids" return pop_gid_intersect(self.nodesets[0], raw_gids=True) # If target is named Mosaic, basically we don't filter and use local_gids diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py index d414b771..28e39386 100644 --- a/neurodamus/utils/memory.py +++ b/neurodamus/utils/memory.py @@ -5,6 +5,7 @@ import ctypes.util import math import os +import json from ..core import MPI, NeurodamusCore as Nd @@ -97,8 +98,8 @@ def print_node_level_mem_usage(): ) -def print_task_level_mem_usage(): - """Print statistics of the memory usage per MPI task.""" +def get_task_level_mem_usage(): + """Return statistics of the memory usage per MPI task.""" usage_mb = get_mem_usage() min_usage_mb = MPI.pc.allreduce(usage_mb, MPI.MIN) @@ -107,6 +108,14 @@ def print_task_level_mem_usage(): dev_usage_mb = math.sqrt(MPI.pc.allreduce((usage_mb - avg_usage_mb) ** 2, MPI.SUM) / MPI.size) + return min_usage_mb, max_usage_mb, avg_usage_mb, dev_usage_mb + + +def print_task_level_mem_usage(): + """Print statistics of the memory usage per MPI task.""" + + min_usage_mb, max_usage_mb, avg_usage_mb, dev_usage_mb = get_task_level_mem_usage() + logging.info( "Memusage (RSS) per task [MB]: Max=%.2lf, Min=%.2lf, Mean(Stdev)=%.2lf(%.2lf)", max_usage_mb, @@ -136,6 +145,21 @@ def get_mem_usage(): return usage_mb +def pretty_printing_memory_mb(memory_mb): + """ + A simple function that given a memory usage in MB + returns a string with the most appropriate unit. + """ + if memory_mb < 1024: + return "%.2lf MB" % memory_mb + elif memory_mb < 1024 ** 2: + return "%.2lf GB" % (memory_mb / 1024) + elif memory_mb < 1024 ** 3: + return "%.2lf TB" % (memory_mb / 1024 ** 2) + else: + return "%.2lf PB" % (memory_mb / 1024 ** 3) + + class SynapseMemoryUsage: ''' A small class that works as a lookup table for the memory used by each type of synapse. @@ -151,3 +175,17 @@ class SynapseMemoryUsage: @classmethod def get_memory_usage(cls, count, synapse_type): return count * cls._synapse_memory_usage[synapse_type] + + +def export_memory_usage_to_json(memory_usage_dict, json_file_name): + # serialize dictionary keys since dump wont accept tuples as keys + memory_usage_dict = {str(k): v for k, v in memory_usage_dict.items()} + with open(json_file_name, 'w') as fp: + json.dump(memory_usage_dict, fp, sort_keys=True, indent=4) + + +def import_memory_usage_from_json(json_file_name): + with open(json_file_name, 'r') as fp: + memory_usage_dict = json.load(fp) + memory_usage_dict = {eval(k): v for k, v in memory_usage_dict.items()} + return memory_usage_dict diff --git a/tests/test_dry_run.py b/tests/test_dry_run.py index 790c5f9b..1ab45cef 100644 --- a/tests/test_dry_run.py +++ b/tests/test_dry_run.py @@ -40,15 +40,17 @@ def test_retrieve_unique_metypes(): # Call the function with unittest.mock.patch('neurodamus.io.cell_readers.isinstance', return_value=True): - result = _retrieve_unique_metypes(node_reader, all_gids) + result_list, result_int = _retrieve_unique_metypes(node_reader, all_gids) # Assertion checks - assert isinstance(result, list) - assert all(isinstance(lst, list) for lst in result) + assert isinstance(result_list, list) + assert all(isinstance(lst, list) for lst in result_list) # Check the expected output based on the test inputs - expected_result = [[1, 3, 5], [2, 4]] - assert result == expected_result + expected_result_list = [[1, 3, 5], [2, 4]] + assert result_list == expected_result_list + expected_result_int = {('emodel1', 'mtype1'): 3, ('emodel2', 'mtype2'): 2} + assert result_int == expected_result_int class DummyNodeReader: