From e77867a2cdcb1d48bd8b8d74d68cfb168dddd4f0 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 21 Mar 2024 14:33:20 +0100 Subject: [PATCH 01/18] Update expand benchmarks and output the SD to separate folder. --- benchmark/.gitignore | 6 ++++++ benchmark/bench_sd_expand_bfs.py | 14 +++++++++++--- benchmark/bench_sd_expand_dfs.py | 14 +++++++++++--- benchmark/bench_sd_expand_min.py | 16 ++++++++++++---- benchmark/bench_sd_expand_scc.py | 19 +++++++++++++------ 5 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 benchmark/.gitignore diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..55f654d --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,6 @@ + +# Ignore benchmark runs. +_run_* + +# Ignore generated succession diagram files. +_sd_expand_* \ No newline at end of file diff --git a/benchmark/bench_sd_expand_bfs.py b/benchmark/bench_sd_expand_bfs.py index 1f2681e..0fc3d2a 100644 --- a/benchmark/bench_sd_expand_bfs.py +++ b/benchmark/bench_sd_expand_bfs.py @@ -1,10 +1,12 @@ from biodivine_aeon import BooleanNetwork -from biobalm.SuccessionDiagram import SuccessionDiagram +from biobalm import SuccessionDiagram import sys -import biobalm.SuccessionDiagram +import os +import pickle +import biobalm # Print progress and succession diagram size. -biobalm.SuccessionDiagram.DEBUG = True +biobalm.succession_diagram.DEBUG = True NODE_LIMIT = 1_000_000 DEPTH_LIMIT = 10_000 @@ -17,6 +19,12 @@ fully_expanded = sd.expand_bfs(bfs_level_limit=DEPTH_LIMIT, size_limit=NODE_LIMIT) assert fully_expanded +model_name = os.path.basename(sys.argv[1]) +sd_name = os.path.splitext(model_name)[0] + '.pickle' +os.makedirs('./_sd_expand_bfs') +with open(f'./_sd_expand_bfs/{sd_name}', 'wb') as handle: + pickle.dump(sd, handle) + print(f"Succession diagram size:", len(sd)) print(f"Minimal traps:", len(sd.minimal_trap_spaces())) diff --git a/benchmark/bench_sd_expand_dfs.py b/benchmark/bench_sd_expand_dfs.py index 74578cc..a8ca11b 100644 --- a/benchmark/bench_sd_expand_dfs.py +++ b/benchmark/bench_sd_expand_dfs.py @@ -1,10 +1,12 @@ from biodivine_aeon import BooleanNetwork -from biobalm.SuccessionDiagram import SuccessionDiagram +from biobalm import SuccessionDiagram import sys -import biobalm.SuccessionDiagram +import os +import pickle +import biobalm # Print progress and succession diagram size. -biobalm.SuccessionDiagram.DEBUG = True +biobalm.succession_diagram.DEBUG = True NODE_LIMIT = 1_000_000 DEPTH_LIMIT = 10_000 @@ -17,6 +19,12 @@ fully_expanded = sd.expand_dfs(dfs_stack_limit=DEPTH_LIMIT, size_limit=NODE_LIMIT) assert fully_expanded +model_name = os.path.basename(sys.argv[1]) +sd_name = os.path.splitext(model_name)[0] + '.pickle' +os.makedirs('./_sd_expand_dfs') +with open(f'./_sd_expand_dfs/{sd_name}', 'wb') as handle: + pickle.dump(sd, handle) + print(f"Succession diagram size:", len(sd)) print(f"Minimal traps:", len(sd.minimal_trap_spaces())) diff --git a/benchmark/bench_sd_expand_min.py b/benchmark/bench_sd_expand_min.py index daa6ef7..613996d 100644 --- a/benchmark/bench_sd_expand_min.py +++ b/benchmark/bench_sd_expand_min.py @@ -1,10 +1,12 @@ from biodivine_aeon import BooleanNetwork -from biobalm.SuccessionDiagram import SuccessionDiagram +from biobalm import SuccessionDiagram import sys -import biobalm.SuccessionDiagram +import os +import pickle +import biobalm # Print progress and succession diagram size. -biobalm.SuccessionDiagram.DEBUG = True +biobalm.succession_diagram.DEBUG = True NODE_LIMIT = 1_000_000 DEPTH_LIMIT = 10_000 @@ -16,8 +18,14 @@ sd = SuccessionDiagram(bn) sd.expand_minimal_spaces() +model_name = os.path.basename(sys.argv[1]) +sd_name = os.path.splitext(model_name)[0] + '.pickle' +os.makedirs('./_sd_expand_min') +with open(f'./_sd_expand_min/{sd_name}', 'wb') as handle: + pickle.dump(sd, handle) + print(f"Succession diagram size:", len(sd)) print(f"Minimal traps:", len(sd.minimal_trap_spaces())) print("size, expanded, minimal") -print(f"{len(sd)}, {len(list(sd.expanded_ids()))}, {len(sd.minimal_trap_spaces())}") +print(f"{len(sd)},{len(list(sd.expanded_ids()))},{len(sd.minimal_trap_spaces())}") diff --git a/benchmark/bench_sd_expand_scc.py b/benchmark/bench_sd_expand_scc.py index 7d69f9a..9adff8f 100644 --- a/benchmark/bench_sd_expand_scc.py +++ b/benchmark/bench_sd_expand_scc.py @@ -1,11 +1,12 @@ -import sys from biodivine_aeon import BooleanNetwork -from biobalm.SuccessionDiagram import SuccessionDiagram -from biobalm._sd_algorithms.expand_source_SCCs import expand_source_SCCs -import biobalm.SuccessionDiagram +from biobalm import SuccessionDiagram +import sys +import os +import pickle +import biobalm # Print progress and succession diagram size. -biobalm.SuccessionDiagram.DEBUG = True +biobalm.succession_diagram.DEBUG = True NODE_LIMIT = 1_000_000 DEPTH_LIMIT = 10_000 @@ -18,8 +19,14 @@ fully_expanded = expand_source_SCCs(sd, check_maa=False) assert fully_expanded +model_name = os.path.basename(sys.argv[1]) +sd_name = os.path.splitext(model_name)[0] + '.pickle' +os.makedirs('./_sd_expand_scc') +with open(f'./_sd_expand_scc/{sd_name}', 'wb') as handle: + pickle.dump(sd, handle) + print(f"Succession diagram size:", len(sd)) print(f"Minimal traps:", len(sd.minimal_trap_spaces())) print("size, expanded, minimal") -print(f"{len(sd)}, {len(list(sd.expanded_ids()))}, {len(sd.minimal_trap_spaces())}") +print(f"{len(sd)},{len(list(sd.expanded_ids()))},{len(sd.minimal_trap_spaces())}") From 3960bf6383bcfce68023edf02ebb176f3aff4646 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 21 Mar 2024 14:49:09 +0100 Subject: [PATCH 02/18] Do not create the output directory automatically. --- benchmark/bench_sd_expand_bfs.py | 1 - benchmark/bench_sd_expand_dfs.py | 1 - benchmark/bench_sd_expand_min.py | 1 - benchmark/bench_sd_expand_scc.py | 1 - 4 files changed, 4 deletions(-) diff --git a/benchmark/bench_sd_expand_bfs.py b/benchmark/bench_sd_expand_bfs.py index 0fc3d2a..3533e83 100644 --- a/benchmark/bench_sd_expand_bfs.py +++ b/benchmark/bench_sd_expand_bfs.py @@ -21,7 +21,6 @@ model_name = os.path.basename(sys.argv[1]) sd_name = os.path.splitext(model_name)[0] + '.pickle' -os.makedirs('./_sd_expand_bfs') with open(f'./_sd_expand_bfs/{sd_name}', 'wb') as handle: pickle.dump(sd, handle) diff --git a/benchmark/bench_sd_expand_dfs.py b/benchmark/bench_sd_expand_dfs.py index a8ca11b..f008c61 100644 --- a/benchmark/bench_sd_expand_dfs.py +++ b/benchmark/bench_sd_expand_dfs.py @@ -21,7 +21,6 @@ model_name = os.path.basename(sys.argv[1]) sd_name = os.path.splitext(model_name)[0] + '.pickle' -os.makedirs('./_sd_expand_dfs') with open(f'./_sd_expand_dfs/{sd_name}', 'wb') as handle: pickle.dump(sd, handle) diff --git a/benchmark/bench_sd_expand_min.py b/benchmark/bench_sd_expand_min.py index 613996d..cbdc32a 100644 --- a/benchmark/bench_sd_expand_min.py +++ b/benchmark/bench_sd_expand_min.py @@ -20,7 +20,6 @@ model_name = os.path.basename(sys.argv[1]) sd_name = os.path.splitext(model_name)[0] + '.pickle' -os.makedirs('./_sd_expand_min') with open(f'./_sd_expand_min/{sd_name}', 'wb') as handle: pickle.dump(sd, handle) diff --git a/benchmark/bench_sd_expand_scc.py b/benchmark/bench_sd_expand_scc.py index 9adff8f..110f715 100644 --- a/benchmark/bench_sd_expand_scc.py +++ b/benchmark/bench_sd_expand_scc.py @@ -21,7 +21,6 @@ model_name = os.path.basename(sys.argv[1]) sd_name = os.path.splitext(model_name)[0] + '.pickle' -os.makedirs('./_sd_expand_scc') with open(f'./_sd_expand_scc/{sd_name}', 'wb') as handle: pickle.dump(sd, handle) From 1d5c05e9e8181ef2becbc9a66226147cba9823bb Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 21 Mar 2024 14:52:35 +0100 Subject: [PATCH 03/18] Fix formatting. --- benchmark/bench_sd_expand_bfs.py | 4 ++-- benchmark/bench_sd_expand_dfs.py | 4 ++-- benchmark/bench_sd_expand_min.py | 4 ++-- benchmark/bench_sd_expand_scc.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmark/bench_sd_expand_bfs.py b/benchmark/bench_sd_expand_bfs.py index 3533e83..22d4bcc 100644 --- a/benchmark/bench_sd_expand_bfs.py +++ b/benchmark/bench_sd_expand_bfs.py @@ -20,8 +20,8 @@ assert fully_expanded model_name = os.path.basename(sys.argv[1]) -sd_name = os.path.splitext(model_name)[0] + '.pickle' -with open(f'./_sd_expand_bfs/{sd_name}', 'wb') as handle: +sd_name = os.path.splitext(model_name)[0] + ".pickle" +with open(f"./_sd_expand_bfs/{sd_name}", "wb") as handle: pickle.dump(sd, handle) print(f"Succession diagram size:", len(sd)) diff --git a/benchmark/bench_sd_expand_dfs.py b/benchmark/bench_sd_expand_dfs.py index f008c61..f2a5f6d 100644 --- a/benchmark/bench_sd_expand_dfs.py +++ b/benchmark/bench_sd_expand_dfs.py @@ -20,8 +20,8 @@ assert fully_expanded model_name = os.path.basename(sys.argv[1]) -sd_name = os.path.splitext(model_name)[0] + '.pickle' -with open(f'./_sd_expand_dfs/{sd_name}', 'wb') as handle: +sd_name = os.path.splitext(model_name)[0] + ".pickle" +with open(f"./_sd_expand_dfs/{sd_name}", "wb") as handle: pickle.dump(sd, handle) print(f"Succession diagram size:", len(sd)) diff --git a/benchmark/bench_sd_expand_min.py b/benchmark/bench_sd_expand_min.py index cbdc32a..dc12f48 100644 --- a/benchmark/bench_sd_expand_min.py +++ b/benchmark/bench_sd_expand_min.py @@ -19,8 +19,8 @@ sd.expand_minimal_spaces() model_name = os.path.basename(sys.argv[1]) -sd_name = os.path.splitext(model_name)[0] + '.pickle' -with open(f'./_sd_expand_min/{sd_name}', 'wb') as handle: +sd_name = os.path.splitext(model_name)[0] + ".pickle" +with open(f"./_sd_expand_min/{sd_name}", "wb") as handle: pickle.dump(sd, handle) print(f"Succession diagram size:", len(sd)) diff --git a/benchmark/bench_sd_expand_scc.py b/benchmark/bench_sd_expand_scc.py index 110f715..a56cdc5 100644 --- a/benchmark/bench_sd_expand_scc.py +++ b/benchmark/bench_sd_expand_scc.py @@ -20,8 +20,8 @@ assert fully_expanded model_name = os.path.basename(sys.argv[1]) -sd_name = os.path.splitext(model_name)[0] + '.pickle' -with open(f'./_sd_expand_scc/{sd_name}', 'wb') as handle: +sd_name = os.path.splitext(model_name)[0] + ".pickle" +with open(f"./_sd_expand_scc/{sd_name}", "wb") as handle: pickle.dump(sd, handle) print(f"Succession diagram size:", len(sd)) From f941482b56c391c6851fcee6d94cd6e79577efc1 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 09:36:30 +0200 Subject: [PATCH 04/18] Refactoring of the algorithm detection methods. --- biobalm/_pint_reachability.py | 152 ++++ .../_sd_algorithms/compute_attractor_seeds.py | 88 --- .../_sd_algorithms/expand_attractor_seeds.py | 21 +- biobalm/_sd_algorithms/expand_source_SCCs.py | 4 +- biobalm/_sd_attractors/__init__.py | 1 + .../_sd_attractors/attractor_candidates.py | 703 ++++++++++++++++++ biobalm/_sd_attractors/attractor_symbolic.py | 231 ++++++ biobalm/motif_avoidant.py | 430 ----------- biobalm/petri_net_translation.py | 3 + biobalm/succession_diagram.py | 597 ++++++++++++--- biobalm/symbolic_utils.py | 32 +- biobalm/types.py | 88 ++- pyproject.toml | 13 +- requirements.txt | 7 +- ...ant_test.py => motif_avoidant_test.py.old} | 0 15 files changed, 1723 insertions(+), 647 deletions(-) create mode 100644 biobalm/_pint_reachability.py delete mode 100644 biobalm/_sd_algorithms/compute_attractor_seeds.py create mode 100644 biobalm/_sd_attractors/__init__.py create mode 100644 biobalm/_sd_attractors/attractor_candidates.py create mode 100644 biobalm/_sd_attractors/attractor_symbolic.py delete mode 100644 biobalm/motif_avoidant.py rename tests/{motif_avoidant_test.py => motif_avoidant_test.py.old} (100%) diff --git a/biobalm/_pint_reachability.py b/biobalm/_pint_reachability.py new file mode 100644 index 0000000..e526f87 --- /dev/null +++ b/biobalm/_pint_reachability.py @@ -0,0 +1,152 @@ +""" +A module which provides utility methods for reachability analysis using Pint. + +Note that in biobalm, we try to treat pint as an *optional* dependency. Hence, if you +want to use any of these methods, make sure to import the module in a way that fails +safely when Pint is not installed. +""" + +from __future__ import annotations + +from functools import reduce +from typing import TYPE_CHECKING + +from networkx import DiGraph # type: ignore +from pypint import Goal, InMemoryModel # type:ignore + +from biobalm.petri_net_translation import place_to_variable, _optimized_recursive_dnf_generator +from biobalm.types import BooleanSpace +import biobalm + +if TYPE_CHECKING: + from biodivine_aeon import Bdd + +GOAL_SIZE_LIMIT = 8192 +""" +Pint is called using command line and can only accept a limited number of arguments. +This limit is applied when constructing goals to avoid reaching this argument limit. + +The limit currently applies to the total number of literals that can be used to +represent a goal. + +The default value was empirically tested as safe on Debian linux, but other operating +systems may need a different limit to stay safe. Nevertheless, this should not be +an issue on smaller/simpler networks. +""" + + +def _pint_reachability( + petri_net: DiGraph, + initial_state: BooleanSpace, + target_states: Bdd, +) -> bool: + """ + Use Pint to check if a given `initial_state` can possibly reach some state + in the `target_states` BDD. + + If the reachability analysis is inconclusive, the method + raises a `RuntimeError`. + """ + if target_states.is_false(): + return False # Cannot reach a stat in an empty set. + + # Build a Pint model through an automata network and copy + # over the initial condition. + pint_model = InMemoryModel(_petri_net_as_automata_network(petri_net)) + for var, level in initial_state.items(): + pint_model.initial_state[var] = level + + goal = _pint_build_symbolic_goal(target_states) + + def failed(_model, _goal): + raise RuntimeError("Cannot verify.") + + return pint_model.reachability(goal=goal, fallback=failed) # type: ignore + + +def _pint_build_symbolic_goal(states: Bdd) -> Goal: + """ + A helper method which (very explicitly) converts a set of states + represented through a BDD into a Pint `Goal`. + + Note that if `GOAL_SIZE_LIMIT` is exceeded, a partial goal is returned + that may not cover all the states in the argument `Bdd`. This avoids + exceeding the argument list size limit, but introduces additional source + of incompleteness into the reachability process. + """ + assert not states.is_false() + + goals: list[Goal] = [] + limit = GOAL_SIZE_LIMIT + for clause in _optimized_recursive_dnf_generator(states): + named_clause = { + states.__ctx__().get_variable_name(var): int(value) + for var, value in clause.items() + } + + limit -= len(named_clause) + if limit < 0: + # If the goal is too large to be passed as a command line argument, + # break here and don't continue. This is not ideal but I'm not sure + # how to fix this other than modifying `pint` itself. + if biobalm.succession_diagram.DEBUG: + print("WARNING: `pint` goal size limit exceeded. A partial goal is used.") + break + + goal_atoms = [f"{var}={level}" for var, level in named_clause.items()] + goals.append(Goal(",".join(goal_atoms))) + + return reduce(lambda a, b: a | b, goals) + + +def _petri_net_as_automata_network(petri_net: DiGraph) -> str: + """ + Takes a Petri net which was created by implicant encoding from a Boolean network, + and builds an automata network file (`.an`) compatible with the Pint tool. + """ + automata_network = "" + + # Go through all PN places and save them as model variables. + variable_set: set[str] = set() + for place, kind in petri_net.nodes(data="kind"): # type: ignore + if kind != "place": + continue + variable_set.add(place_to_variable(place)[0]) # type: ignore[reportUnknownArgumentType] # noqa + variables = sorted(variable_set) + + # Declare all variables with 0/1 domains. + for var in variables: + automata_network += f'"{var}" [0, 1]\n' + + for transition, kind in petri_net.nodes(data="kind"): # type: ignore + if kind != "transition": + continue + + predecessors = set(petri_net.predecessors(transition)) # type: ignore + successors = set(petri_net.successors(transition)) # type: ignore + + # The value under modification is the only + # value that is different between successors and predecessors. + source_place = next(iter(predecessors - successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa + target_place = next(iter(successors - predecessors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa + + (s_var, s_level) = place_to_variable(source_place) # type: ignore[reportUnknownArgumentType] # noqa + (t_var, t_level) = place_to_variable(target_place) # type: ignore[reportUnknownArgumentType] # noqa + assert s_var == t_var + + # The remaining places represent the necessary conditions. + # Here, we transform them into a text format. + conditions = sorted(predecessors.intersection(successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa + conditions = [place_to_variable(p) for p in conditions] # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa + conditions = [f'"{var}"={int(level)}' for var, level in conditions] + + # A pint rule consists of a variable name, value transition, + # and a list of necessary conditions for the transition (if any). + if len(conditions) == 0: + rule = f'"{s_var}" {int(s_level)} -> {int(t_level)}\n' + else: + rule = f"\"{s_var}\" {int(s_level)} -> {int(t_level)} when {' and '.join(conditions)}\n" + + automata_network += rule + + return automata_network diff --git a/biobalm/_sd_algorithms/compute_attractor_seeds.py b/biobalm/_sd_algorithms/compute_attractor_seeds.py deleted file mode 100644 index 4853bb8..0000000 --- a/biobalm/_sd_algorithms/compute_attractor_seeds.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from biobalm.symbolic_utils import state_list_to_bdd - -if TYPE_CHECKING: - from biobalm.succession_diagram import SuccessionDiagram - -import biobalm -import biobalm.succession_diagram -from biobalm.motif_avoidant import detect_motif_avoidant_attractors, make_retained_set -from biobalm.trappist_core import compute_fixed_point_reduced_STG -from biobalm.types import BooleanSpace - - -def compute_attractor_seeds( - sd: SuccessionDiagram, - node_id: int, -) -> list[BooleanSpace]: - """ - Compute the list of vertices such that each attractor within the subspace of - the given `node_id` is covered by exactly one vertex. - - If the node is a stub, the result covers the whole subspace. If the node is - expanded, the result only covers the "immediate" subspace without the - subspaces of the child nodes. - """ - - if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Start computing attractor seeds.") - - node_space = sd.node_data(node_id)["space"] - - if len(node_space) == sd.network.variable_count(): - # This node is a fixed-point. - return [node_space] - - # Compute the list of child spaces if the node is expanded. Otherwise - # "pretend" that there are no children. - child_spaces = [] - if sd.node_data(node_id)["expanded"]: - child_spaces = [sd.node_data(s)["space"] for s in sd.node_successors(node_id)] - - # Fix everything in the NFVS to zero, as long as - # it isn't already fixed by our `node_space`. - # - # We add the whole node space to the retain set because we know - # the space is a trap and this will remove the corresponding unnecessary - # Petri net transitions. - retained_set = make_retained_set( - sd.symbolic, sd.node_nfvs(node_id), node_space, child_spaces - ) - - if len(retained_set) == sd.network.variable_count() and len(child_spaces) == 0: - # There is only a single attractor remaining here, - # and its "seed" is the retained set. - return [retained_set] - - terminal_restriction_space = state_list_to_bdd( - sd.symbolic.symbolic_context(), child_spaces - ).l_not() - - candidate_seeds = compute_fixed_point_reduced_STG( - sd.petri_net, - retained_set, - ensure_subspace=node_space, - avoid_subspaces=child_spaces, - ) - - if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Found {len(candidate_seeds)} seed candidates.") - - if len(candidate_seeds) == 1 and len(child_spaces) == 0: - # If this is a (non-strict) minimal trap and there is only one seed, - # the seed must be valid. - return candidate_seeds - else: - attractors = detect_motif_avoidant_attractors( - sd.symbolic, - sd.petri_net, - candidate_seeds, - terminal_restriction_space, - max_iterations=1000, - is_in_an_mts=len(child_spaces) == 0, - ) - - return attractors diff --git a/biobalm/_sd_algorithms/expand_attractor_seeds.py b/biobalm/_sd_algorithms/expand_attractor_seeds.py index 25440f6..c72a4b7 100644 --- a/biobalm/_sd_algorithms/expand_attractor_seeds.py +++ b/biobalm/_sd_algorithms/expand_attractor_seeds.py @@ -7,9 +7,10 @@ import biobalm import biobalm.succession_diagram -from biobalm.motif_avoidant import make_retained_set from biobalm.space_utils import intersect from biobalm.trappist_core import compute_fixed_point_reduced_STG +from biobalm._sd_attractors.attractor_candidates import make_heuristic_retained_set +from biodivine_aeon import AsynchronousGraph def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None): @@ -69,20 +70,26 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) # Now, we need to asses if the next successor has some candidate states which # are not covered by the already expanded children. - successor_space = sd.node_data(successors[-1])["space"] - retained_set = make_retained_set( - sd.symbolic, sd.node_nfvs(node), successor_space - ) + s = successors[-1] + + successor_space = sd.node_data(s)["space"] + successor_bn = sd.node_percolated_network(s, compute=True) + successor_nfvs = sd.node_percolated_nfvs(s, compute=True) + successor_pn = sd.node_percolated_petri_net(s, compute=True) + successor_graph = AsynchronousGraph(successor_bn) avoid_or_none = [ intersect(successor_space, child) for child in expanded_motifs ] avoid = [x for x in avoid_or_none if x is not None] + retained_set = make_heuristic_retained_set( + successor_graph, successor_nfvs, avoid + ) + successor_seeds = compute_fixed_point_reduced_STG( - sd.petri_net, + successor_pn, retained_set, - ensure_subspace=successor_space, avoid_subspaces=avoid, solution_limit=1, ) diff --git a/biobalm/_sd_algorithms/expand_source_SCCs.py b/biobalm/_sd_algorithms/expand_source_SCCs.py index f9025a6..6214581 100644 --- a/biobalm/_sd_algorithms/expand_source_SCCs.py +++ b/biobalm/_sd_algorithms/expand_source_SCCs.py @@ -77,7 +77,7 @@ def expand_source_SCCs( next_level.append(sd._ensure_node(root, sub_space)) # type: ignore sd.node_data(root)["expanded"] = True - sd.node_data(root)["attractors"] = [] # no need to look for attractors here + sd.node_data(root)["attractor_seeds"] = [] # no need to look for attractors here current_level = next_level next_level = [] @@ -132,7 +132,7 @@ def expand_source_SCCs( for node_id in final_level: # These assertions should be unnecessary, but just to be sure. assert not sd.node_data(node_id)["expanded"] # expand nodes from here - assert sd.node_data(node_id)["attractors"] is None # check attractors from here + assert sd.node_data(node_id)["attractor_seeds"] is None # check attractors from here # restore this once we allow all expansion algorithms to expand from a node # expander(sd, node_id) diff --git a/biobalm/_sd_attractors/__init__.py b/biobalm/_sd_attractors/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/biobalm/_sd_attractors/__init__.py @@ -0,0 +1 @@ + diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py new file mode 100644 index 0000000..2eb4f40 --- /dev/null +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -0,0 +1,703 @@ +""" +Provides functions that are used to identify and reduce the set of attractor candidate states. + +Note that most of these functions assume they are operating on the network or petri net +percolated to the subspace of the relevant node. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, cast + +if TYPE_CHECKING: + from biobalm.succession_diagram import SuccessionDiagram + from biobalm.types import BooleanSpace + from networkx import DiGraph # type: ignore + +import random +import biobalm +from biodivine_aeon import Bdd, AsynchronousGraph +from biobalm.trappist_core import compute_fixed_point_reduced_STG +from biobalm.symbolic_utils import state_list_to_bdd, valuation_to_state, state_to_bdd + +try: + PINT_AVAILABLE = True + import biobalm._pint_reachability as pint +except ModuleNotFoundError: + PINT_AVAILABLE = False + + +CANDIDATES_HARD_LIMIT = 100_000 +""" +If there are more than this amount of attractor candidates, then +attractor detection will fail with a runtime error. This is mainly to +avoid out-of-memory errors or crashing clingo. +""" + +RETAINED_SET_OPTIMIZATION_THRESHOLD = 100 +""" +If there are more than this amount of attractor candidates, we will +try to optimize the retained set using ASP (if enabled). +""" + + +def compute_attractor_candidates( + sd: SuccessionDiagram, + node_id: int, + greedy_asp_minification: bool, + simulation_minification: bool, + pint_minification: bool, +) -> list[BooleanSpace]: + """ + Compute an optimized list of candidate states that is guaranteed + to cover every attractor in the specified SD node. + + It should hold that for every attractor in the node's sub-space outside + of the child sub-spaces (if known), the list contains at least one state + from this attractor. However, it is not guaranteed that each candidate + covers some attractor, so a non-empty list of candidates can + still correspond to an empty list of attractors. + + The method starts by computing an NFVS of the network percolated to the + node's space, and then assigns each NFVS node a value towards which + it tends to. This transformation satisfies that the fixed-points in the + resulting network cover the attractors in the original sub-space. + + Subsequently, the method can further optimize this set of candidates. + + ### Greedy ASP minification + + Depending on which values are chosen for the NFVS nodes, a different set of + candidates is obtained. This optimization method flips the initial NFVS node + values and retains modifications that lead to a smaller candidate set, until + a local minimum is achieved. + + ### Simulation minification + + It is often possible to reduce the candidate set using a pseudo-random walk + which eventually leads to one of the child spaces, or to one of the existing + candidate states. + + ### Pint minification + + The tool `pint` can perform (incomplete) reachability check for any network + state, hence we can also use it to reduce the list of candidate states. + This option is only available if pint is actually installed. + + Parameters + ---------- + sd : SuccessionDiagram + The succession diagram in question. + node_id : int + An ID of an SD node for which we are computing the candidates. + greedy_asp_minification: bool + Whether to enable the iterative ASP-based minification. + simulation_minification: bool + Whether to enable simulation minification. + pint_minification: bool + Whether to enable pint minification. + + Returns + ------- + list[BooleanSpace] + The list of attractor candidate states. + """ + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Start computing attractor candidates.") + + node_data = sd.node_data(node_id) + + node_space = node_data["space"] + + if len(node_space) == sd.network.variable_count(): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Attractor candidates done: node is a fixed-point.") + return [node_space] + + node_nfvs = sd.node_percolated_nfvs(node_id, compute=True) + + if biobalm.succession_diagram.DEBUG: + root_nfvs = sd.node_percolated_nfvs(sd.root(), compute=True) + print(f"[{node_id}] > Percolated node NFVS contains {len(node_nfvs)} nodes (vs {len(root_nfvs)} in the root).") + + # Compute the list of child stable-motifs if the node is expanded. Otherwise + # "pretend" that there are no children. + # + # (We can safely ignore anything that is in these stable motifs, not just the + # percolated child spaces, because it is either in the child space, or it percolates + # to the child space, so it does not contain attractors) + child_motifs_reduced = [] + if node_data["expanded"]: + children = sd.node_successors(node_id, compute=False) + child_motifs_reduced = [sd.edge_stable_motif(node_id, s, reduced=True) for s in children] + + # Indicates that this space is either minimal, or has no computed successors. + # In either case, the space must contain at least one attractor. + node_is_pseudo_minimal = len(child_motifs_reduced) == 0 + + if biobalm.succession_diagram.DEBUG and node_is_pseudo_minimal: + print(f"[{node_id}] > The node has no children (i.e. it is minimal or unexpanded).") + + if len(node_nfvs) == 0: + # If the NFVS is empty, it means this node space has no complex attractors. + # In a fully expanded SD, all fixed-points should be handled by the condition above, + # so only non-minimal spaces remain and these can be resolved as empty. + # + # However, if SD is not fully expanded, it can contain further "pseudo-minimal" + # spaces that would contain minimal traps if expanded. For those, there should + # only be fixed-point attractors, but we need to identify + # them, so we need to continue. + + # This tests the assumption that all fully expanded minimal + # traps are handled before, and the only trap spaces with + # an empty NFVS are unexpanded pseudo-minimal, + # or expanded non-minimal. + assert not sd.node_is_minimal(node_id) + + if not node_is_pseudo_minimal: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Attractor candidates done: empty NFVS in a non-minimal space.") + return [] + + # First, we create a retained set and corresponding candidate set that is valid in the + # percolated Boolean network. This process is *usually* quite fast, but + # sometimes the candidate set explodes and is very large (even too large to enumerate). + # In that case, we can try to simplify it greedily using the ASP solver (assuming this + # is enabled). + # + # Overall, our strategy is the following: + # 1. Compute a heuristic retained set based on the original NFVS method. + # 2. If the ASP optimization is disabled, we generate the candidates from this retained + # set in full. + # 3. If ASP optimization is enabled, we first test if there are at least 100 candidates + # for the initial retained set. + # 3a. If there are fewer than 100 candidates, we just run one optimization pass on + # the initial retained set and report the result. + # 3b. If there are more than 100 candidates, we probably need to optimize the retained + # set even further, in which case we start to recursively generate a new retained + # set that is optimized in each step to hopefully get a better result. + + pn_reduced = sd.node_percolated_petri_net(node_id, compute=True) + bn_reduced = sd.node_percolated_network(node_id, compute=True) + graph_reduced = AsynchronousGraph(bn_reduced) + + retained_set = make_heuristic_retained_set(graph_reduced, node_nfvs, child_motifs_reduced) + + if len(retained_set) == sd.network.variable_count() and node_is_pseudo_minimal: + # If the retained set describes a fixed point, then only one attractor + # is present in this space and it must contain the state described by the retained set. + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Singular attractor found through fixed-point retained set. Done.") + return [retained_set | node_space] + + if not greedy_asp_minification: + candidate_states = compute_fixed_point_reduced_STG( + pn_reduced, + retained_set, + avoid_subspaces=child_motifs_reduced, + solution_limit=CANDIDATES_HARD_LIMIT + ) + if len(candidate_states) == CANDIDATES_HARD_LIMIT: + raise RuntimeError(f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT}).") + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Computed {len(candidate_states)} candidate states without retained set optimization.") + else: + candidate_states = compute_fixed_point_reduced_STG( + pn_reduced, + retained_set, + avoid_subspaces=child_motifs_reduced, + solution_limit=RETAINED_SET_OPTIMIZATION_THRESHOLD + ) + + if len(candidate_states) < RETAINED_SET_OPTIMIZATION_THRESHOLD: + # The candidate set is small. No need to overthink it. The original + # retained set is probably quite good. However, we can still try to + # optimize it further if it makes sense. + if len(candidate_states) > 1 or (not node_is_pseudo_minimal and len(candidate_states) > 0): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Initial retained set generated {len(candidate_states)} candidates. Optimizing...") + optimized = asp_greedy_retained_set_optimization( + node_id, + petri_net=pn_reduced, + retained_set=retained_set, + candidate_states=candidate_states, + avoid_dnf=child_motifs_reduced, + ) + retained_set = optimized[0] + candidate_states = optimized[1] + else: + # There seem to be many candidates, in which case it might be better + # to optimize the retained set dynamically. + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Initial retained set generated >{RETAINED_SET_OPTIMIZATION_THRESHOLD} candidates. Regenerate retained set.") + retained_set = {} + candidate_states = [] + for var in node_nfvs: + retained_set[var] = 0 + candidate_states_zero = compute_fixed_point_reduced_STG( + pn_reduced, + retained_set, + avoid_subspaces=child_motifs_reduced, + solution_limit=CANDIDATES_HARD_LIMIT + ) + + if len(candidate_states_zero) <= len(candidate_states): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Chosen {var}=0 without increasing candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + candidate_states = candidate_states_zero + continue + + retained_set[var] = 1 + candidate_states_one = compute_fixed_point_reduced_STG( + pn_reduced, + retained_set, + avoid_subspaces=child_motifs_reduced, + solution_limit=len(candidate_states_zero) + ) + + if len(candidate_states_zero) == CANDIDATES_HARD_LIMIT and len(candidate_states_one) == CANDIDATES_HARD_LIMIT: + raise RuntimeError(f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT}).") + + if len(candidate_states_one) <= len(candidate_states): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Chosen {var}=1 without increasing candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + candidate_states = candidate_states_one + continue + + if len(candidate_states_zero) < len(candidate_states_one): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Chosen {var}=0 with better candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + candidate_states = candidate_states_zero + retained_set[var] = 0 + else: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Chosen {var}=1 with better candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + candidate_states = candidate_states_one + retained_set[var] = 1 + + # At this point, we know the candidate count increased and so we should + # try to bring it back down. + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Optimizing partial retained set...") + optimized = asp_greedy_retained_set_optimization( + node_id, + petri_net=pn_reduced, + retained_set=retained_set, + candidate_states=candidate_states, + avoid_dnf=child_motifs_reduced, + ) + retained_set = optimized[0] + candidate_states = optimized[1] + + # Terminate if done. + if len(candidate_states) == 0: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Initial candidate set empty. Done.") + return [] + if node_is_pseudo_minimal and len(candidate_states) == 1: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done.") + return [candidate_states[0] | node_space] + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Attractor candidates from retained set: {len(candidate_states)}.") + + if simulation_minification: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Start simulation minification...") + avoid_children_symbolic = state_list_to_bdd( + graph_reduced.symbolic_context(), child_motifs_reduced + ) + + # Here, we gradually increase the iteration count while + # the candidate set is being actively reduced. If the simulation + # cannot reduce any further states, we are done. + iterations = 1024 + while len(candidate_states) > 0: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Start simulation with {len(candidate_states)} states and simulation limit {iterations}.") + reduced = run_simulation_minification( + node_id, + graph_reduced, + candidate_states, + avoid_children_symbolic, + max_iterations=iterations, + simulation_seed=123, + ) + + if len(reduced) == len(candidate_states): + candidate_states = reduced + break + + iterations = 2 * iterations + candidate_states = reduced + + if len(candidate_states) == 1 and avoid_children_symbolic.is_false(): + break + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Candidates after simulation: {len(candidate_states)}") + + # Terminate if done. + if len(candidate_states) == 0: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Candidate set empty. Done.") + return [] + if node_is_pseudo_minimal and len(candidate_states) == 1: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done.") + return [candidate_states[0] | node_space] + + if pint_minification and not PINT_AVAILABLE: + print("WARNING: Using `pint`, but `pint` is not installed. Skipping.") + elif pint_minification and PINT_AVAILABLE: + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] Start `pint` minification...") + + children_bdd = state_list_to_bdd(graph_reduced.symbolic_context(), child_motifs_reduced) + candidates_bdd = state_list_to_bdd(graph_reduced.symbolic_context(), candidate_states) + avoid_bdd = children_bdd.l_or(candidates_bdd) + + filtered_states = [] + for (i, state) in enumerate(candidate_states): + state_bdd = state_to_bdd(graph_reduced.symbolic_context(), state) + + avoid_bdd = avoid_bdd.l_and_not(state_bdd) + + keep = True + try: + if pint._pint_reachability(pn_reduced, state, avoid_bdd): + keep = False + except RuntimeError as e: + assert str(e) == "Cannot verify." + + if keep: + avoid_bdd = avoid_bdd.l_or(state_bdd) + filtered_states.append(state) + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > `pint` {i+1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}.") + + candidate_states = filtered_states + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Candidates after `pint`: {len(candidate_states)}") + + # Finally, we need to augment each candidate state with the + # fixed values from the node space, because until now, we only considered + # the reduced/percolated network. + candidate_states = [x | node_space for x in candidate_states] + + return candidate_states + + +def run_simulation_minification( + node_id: int, + graph: AsynchronousGraph, + candidate_states: list[BooleanSpace], + avoid_bdd: Bdd, + max_iterations: int, + simulation_seed: int, +) -> list[BooleanSpace]: + """ + A fast but incomplete method for eliminating spurious attractor candidates + based on stochastic simulation. + + Parameters + ---------- + node_id : int + The ID of the associated SD node. This is only for logging progress. + graph : biodivine_aeon.AsynchronousGraph + The symbolic encoding of the *percolated* network dynamics. + candidate_states: list[BooleanSpace] + The list of currently considered candidates that is to be reduced. + avoid_bdd: biodivine_aeon.Bdd + A symbolic representation of the states/spaces that are to be ignored. + If any of these states is reachable by a candidate, that candidate is + safe to ignore as well. + max_iterations: int + The number of steps performed by the simulator. + simulation_seed: int + The seed value for the random walk simulator. + + Returns + ------- + list[BooleanSpace] + The optimized list of candidate states. + """ + + # A random generator initialized with a fixed seed. Ensures simulation + # is randomized but deterministic. + generator = random.Random(simulation_seed) + + # Retrieve the symbolic variables that encode each network variable and + # associate them with the corresponding network's update functions. + symbolic_ctx = graph.symbolic_context() + network_vars = graph.network_variables() + symbolic_vars = [] + for var in network_vars: + s_var = symbolic_ctx.find_network_bdd_variable(var) + assert s_var is not None + symbolic_vars.append(s_var) + update_functions = {symbolic_vars[i]: graph.mk_update_function(network_vars[i]) for i in range(len(network_vars))} + + # Use stochastic simulation to prune the set of candidate states. + # We use different simulation approach depending on whether this space + # is a minimal trap or not. + if not avoid_bdd.is_false(): + + candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) + filtered_candidates: list[BooleanSpace] = [] + + for (i, state) in enumerate(candidate_states): + + if i % 100 == 99 and biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Simulation progress: {i+1}/{len(candidate_states)}") + + # Remove the state from the candidates. If we can prove that is + # is not an attractor, we will put it back. + state_bdd = graph.mk_subspace(state).to_bdd() + candidates_bdd = candidates_bdd.l_and_not(state_bdd) + + # Convert the state BDD to a valuation object. This is like + # a dictionary, but updates much faster because it is managed + # by rust and indexed by symbolic variable IDs and not names. + # + # (The `state_bdd` should be a singleton, so it's ok to just + # take the first valuation) + simulation = state_bdd.valuation_first() + + is_valid_candidate = True + for _ in range(max_iterations): + # Advance all variables by one step in random order. + generator.shuffle(symbolic_vars) + for s_var in symbolic_vars: + # We can just "call" the BDD as a normal function with + # a valuation as the input. + step = update_functions[s_var](simulation) + simulation[s_var] = step + + # To check for set inclusion, we can also just "eval" the + # BDD, since it evaluates to true for every state that is + # in the encoded set. + + if candidates_bdd(simulation): + # The state can reach some other state in the candidate + # set. This does not mean it cannot be an attractor, but + # it means it is sufficient to keep considering + # the remaining candidates. + is_valid_candidate = False + break + + if avoid_bdd(simulation): + # The state can reach some other state in the avoid set, + # which means it cannot be an attractor in this subspace. + is_valid_candidate = False + break + + if is_valid_candidate: + # If we cannot rule out the candidate, we have to put it back + # into the candidate set. + + # However, we want to use the new state (after simulation), + # because this one is probably "closer" to the goals that we + # want to eventually get to. + + candidates_bdd = candidates_bdd.l_or(Bdd(simulation)) + filtered_candidates.append(valuation_to_state(symbolic_ctx, simulation)) + + return filtered_candidates + else: + # If the avoid set is empty, it means that this is a pseudo-minimal space + # and hence we (a) don't have to check if we reached the avoid set, + # and (b) we can stop with one candidate instead of zero. + + candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) + printed = set() + for i in range(max_iterations): + + progress = int((i * len(candidate_states)) / max_iterations) + if progress % 100 == 99 and biobalm.succession_diagram.DEBUG and (progress not in printed): + printed.add(progress) + print(f"[{node_id}] > Simulation progress: {progress + 1}/{len(candidate_states)}") + + generator.shuffle(symbolic_vars) + new_candidates_bdd = symbolic_ctx.mk_constant(False) + + # Goes through all the states in the candidates BDD, updates + # them, and puts them in the new BDD assuming they are not there yet. + for state_val in candidates_bdd.valuation_iterator(): + candidates_bdd = candidates_bdd.l_and_not(Bdd(state_val)) + + simulation = state_val + for s_var in symbolic_vars: + step = update_functions[s_var](simulation) + simulation[s_var] = step + + if candidates_bdd(simulation) or new_candidates_bdd(simulation): + # We have reached one of the other candidates. + continue + + new_candidates_bdd = new_candidates_bdd.l_or(Bdd(simulation)) + + candidates_bdd = new_candidates_bdd + + if candidates_bdd.cardinality() <= 1: + break + + # Finally, we convert the candidate states back into something that + # biobalm understands. + filtered_candidates = [] + for state_val in candidates_bdd.valuation_iterator(): + filtered_candidates.append(valuation_to_state(symbolic_ctx, state_val)) + + return filtered_candidates + + +def asp_greedy_retained_set_optimization( + node_id: int, + petri_net: DiGraph, + retained_set: BooleanSpace, + candidate_states: list[BooleanSpace], + avoid_dnf: list[BooleanSpace], +) -> tuple[BooleanSpace, list[BooleanSpace]]: + """ + Takes a Boolean network encoded as a Petri net and a candidate retained set. + Then, performs a greedy optimization process leading to a locally optimal + version of the original retained set that results in the least + amount of candidate states. + + + Parameters + ---------- + node_id : int + The ID of the associated SD node. This is only for logging progress. + petri_net : networkx.DiGraph + The Petri net encoding of the *percolated* network dynamics. + retained_set : BooleanSpace + The retained set that will be the initial point of the optimization. + candidate_states: list[BooleanSpace] + The list of candidates that are valid for the given `retained_set`, so + that they don't need to be recomputed. + avoid_dnf: list[BooleanSpace] + The list of subspaces in the given network in which candidate + states can be ignored. + + Returns + ------- + tuple[BooleanSpace, list[BooleanSpace]] + The optimized reatined set, together with the list of candidates that + are valid for this retained set. + """ + done = False + while not done: + done = True + for var in retained_set: + # Standrad termination checks. + if len(candidate_states) == 0: + return (retained_set, []) + if len(avoid_dnf) == 0 and len(candidate_states) == 1: + # If we are not avoiding anything, this is a (pseudo) + # minimal space and a single candidate is good enough. + return (retained_set, candidate_states) + + # Flip the value of the specified variable to see if the resulting + # candidate set is smaller. + retained_set_2 = retained_set.copy() + retained_set_2[var] = cast(Literal[0, 1], 1 - retained_set_2[var]) + candidate_states_2 = compute_fixed_point_reduced_STG( + petri_net, + retained_set_2, + avoid_subspaces=avoid_dnf, + # We don't need all solutions if the result isn't smaller. + solution_limit=len(candidate_states) + ) + if len(candidate_states_2) < len(candidate_states): + retained_set = retained_set_2 + candidate_states = candidate_states_2 + done = False + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Candidate states optimized to {len(candidate_states)}.") + return (retained_set, candidate_states) + + +def make_heuristic_retained_set( + graph: AsynchronousGraph, + nfvs: list[str], + avoid_dnf: list[BooleanSpace] +) -> BooleanSpace: + """ + Calculate the retained set for a given `space` based on heuristic criteria. + + The retained set is technically a space-like object that describes the + variables which have to be fixed in order for the network to lose all + complex attractors. However, note that this really means changing the update + functions of these variables. This is not a trap space that only contains + fixed-points, but a description of how the network must be modified to + eliminate all complex attractors. + + The construction guarantees that any complex attractor of the old + network will manifest as at least one fixed-point in the new network. But this + network modification is not implemented here. This method only generates + the necessary list of variables and values. + + Finally, note that the method uses a heuristic to select values + that should ideally lead to the least amount of fixed-points in + the modified network, but there can be other, more optimal retained sets. + + Parameters + ---------- + graph : AsynchronousGraph + The symbolic update functions stored as an `AsynchronousGraph` object + from the `biodivine_aeon` library. + nfvs : list[str] + The list of variables that represent an NFVS of the network encoded + by `graph`. + avoid_dnf : list[BooleanSpace] + A list of :class:`BooleanSpace` objects + describing the child spaces of the specified network. Only attractors that + are not in these child spaces are considered. If no child spaces are + provided, then all attractors are considered. + + Returns + ------- + BooleanSpace + A :class:`BooleanSpace` object describing the + retained set. + """ + retained_set = {} + + # First, if there are any child spaces present, we extend the retained set + # with the values from the one that has the least amount of fixed variables + # shared with the NFVS. + if len(avoid_dnf) > 0: + # Find the child space that has the fewest nodes in common with the NFVS: + least_common_child_space = avoid_dnf[0] + least_common_nodes = len(set(least_common_child_space) & set(nfvs)) + for child_space in avoid_dnf: + common_nodes = len(set(child_space) & set(nfvs)) + if common_nodes < least_common_nodes: + least_common_nodes = common_nodes + least_common_child_space = child_space + + for x in least_common_child_space: + if (x not in retained_set) and (x in nfvs): + retained_set[x] = least_common_child_space[x] + + # Then, set the remaining NFVS variables based on the majority output value + # in the update function of the relevant variable. + for x in nfvs: + if x in retained_set: + continue + + fn_bdd = graph.mk_update_function(x) + + # If most of the function is positive, we set the value + # to `1`, otherwise set it to `0`. + if fn_bdd.cardinality() > fn_bdd.l_not().cardinality(): + retained_set[x] = 1 + else: + retained_set[x] = 0 + + return retained_set diff --git a/biobalm/_sd_attractors/attractor_symbolic.py b/biobalm/_sd_attractors/attractor_symbolic.py new file mode 100644 index 0000000..cf38f4f --- /dev/null +++ b/biobalm/_sd_attractors/attractor_symbolic.py @@ -0,0 +1,231 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast, Literal + +if TYPE_CHECKING: + from biobalm.succession_diagram import SuccessionDiagram + from biodivine_aeon import VariableId, VertexSet + +import biobalm +from biodivine_aeon import AsynchronousGraph, ColoredVertexSet +from biobalm.symbolic_utils import state_list_to_bdd +from biobalm.types import BooleanSpace + + +def compute_attractors_symbolic( + sd: SuccessionDiagram, + node_id: int, + candidate_states: list[BooleanSpace], + seeds_only: bool = False +) -> tuple[list[BooleanSpace], list[VertexSet] | None]: + """ + Uses exhaustive symbolic reachability to eliminate spurious candidate states + and to compute the exact attractor sets. + + Parameters + ---------- + sd : SuccessionDiagram + The succession diagram in question. + node_id : int + An ID of an SD node for which we are computing the candidates. + candidate_states: bool + The list of candidate states covering the attractors in the + given node. + seeds_only: bool + If `True`, the method can terminate early once it is guaranteed that + all seeds have been identified. In such case, the list of returned sets + is `None`. + + Returns + ------- + tuple[list[BooleanSpace], list[VertexSet] | None] + The list of attractor seed states and corresponding attractor sets (if computed). + """ + + node_data = sd.node_data(node_id) + node_space = node_data["space"] + bn_reduced = sd.node_percolated_network(node_id, compute=True) + graph_reduced = AsynchronousGraph(bn_reduced) + symbolic_ctx = graph_reduced.symbolic_context() + + # Ignore variables that are already fixed in the nodes space. + # These are not used in the reduced network. + candidate_states_reduced = [] + for candidate in candidate_states: + candidate = {k: v for (k, v) in candidate.items() if k not in node_space} + candidate_states_reduced.append(candidate) + + child_motifs_reduced = [] + if node_data["expanded"]: + children = sd.node_successors(node_id, compute=False) + child_motifs_reduced = [sd.edge_stable_motif(node_id, s, reduced=True) for s in children] + + child_motifs_bdd = state_list_to_bdd(symbolic_ctx, child_motifs_reduced) + candidate_bdd = state_list_to_bdd(symbolic_ctx, candidate_states_reduced) + + children_set = ColoredVertexSet(symbolic_ctx, child_motifs_bdd) + candidate_set = ColoredVertexSet(symbolic_ctx, candidate_bdd) + + avoid = candidate_set.union(children_set) + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Start symbolic seed state identification with {len(candidate_states)} candidates and {avoid} avoid states.") + + seeds: list[BooleanSpace] = [] + sets = [] + for (i, candidate) in enumerate(candidate_states_reduced): + is_last = (i == len(candidate_states_reduced) - 1) + is_minimal = len(child_motifs_reduced) == 0 + if seeds_only and is_minimal and is_last and len(seeds) == 0: + # This node is pseudo-minimal, so it must contain at least + # one attractor seed. If we only care about the attractor seeds, + # we can thus return the last candidate without checking it. + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Single seed remaining in a (pseduo) minimal space. Done.") + return ([candidate | node_space], None) + candidate_singleton = graph_reduced.mk_subspace(candidate) + + avoid = avoid.minus(candidate_singleton) + + closure = symbolic_attractor_test( + node_id, + graph_reduced, + candidate, + avoid + ) + + if closure is None: + # This candidate can reach someone else in the candidate set, + # or they can reach one of the child spaces. Hence it is not + # an attractor. + continue + + # Otherwise, we have an attractor set and we can save it. + # It also becomes a part of the avoid set. + avoid = avoid.union(closure) + seeds.append(candidate | node_space) + sets.append(closure) + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Finished identification with {len(seeds)} seed states.") + + space_symbolic = sd.symbolic.mk_subspace(node_space).vertices() + sets_converted = [] + for s in sets: + # Extend the attractor set with fixed nodes from the node space. + vertices = s.vertices() + vertices = sd.symbolic.transfer_from(vertices, graph_reduced) + vertices = vertices.intersect(space_symbolic) + sets_converted.append(vertices) + + return (seeds, sets_converted) + + +def symbolic_attractor_test( + node_id: int, + graph: AsynchronousGraph, + pivot: BooleanSpace, + avoid_set: ColoredVertexSet, +) -> ColoredVertexSet | None: + """ + Use symbolic reachability with saturation to compute the set of states + reachable from the given `pivot`, or `None` if the reachability procedure + intersects with any state in the `avoid` set. + + *The reason why we use `ColoredVertexSet` instead of `VertexSet` is mostly + a technicality that should be irelevant in biobalm, since we don't allow any + parameters outside of unknown inputs.* + """ + + if avoid_set.is_empty(): + avoid = None + else: + avoid = avoid_set + + reach_set = graph.mk_subspace(pivot) + + # The variables for which we already maintain that `reach_set` + # contains all reachable states. + saturated_vars: list[VariableId] = [] + + # Variables where `reach_set` differs from the states in `avoid`. + # We should prioritize updating these variables, because they *need* + # to be updated if we are ever to reach `avoid`. + conflict_vars: list[VariableId] = [] + + # Populate conflict vars, assuming we have any. + if avoid is not None: + avoid_bdd = avoid.to_bdd() + for (name, val) in pivot.items(): + var = graph.find_network_variable(name) + bdd_var = graph.symbolic_context().find_network_bdd_variable(name) + assert var is not None and bdd_var is not None + incompatible = avoid_bdd.r_select({bdd_var: cast(Literal[0, 1], 1 - val)}) + if not incompatible.is_false(): + conflict_vars.append(var) + conflict_vars = sort_variable_list(conflict_vars) + + # Remaining network variables that are still relevant, but may not + # be necessary to reach `avoid`. + other_vars: list[VariableId] = [x for x in graph.network_variables() if x not in conflict_vars] + other_vars = sort_variable_list(other_vars) + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Start symbolic reachability with {len(conflict_vars)} conflict variables and {len(other_vars)} other variables.") + + all_done = False + while not all_done: + all_done = True + + # Saturate reach set with currently selected variables. + saturation_done = False + while not saturation_done: + if avoid is not None and not avoid.intersect(reach_set).is_empty(): + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Discovered avoid state. Done.") + return None + + saturation_done = True + for var in saturated_vars: + successors = graph.var_post_out(var, reach_set) + if not successors.is_empty(): + reach_set = reach_set.union(successors) + saturation_done = False + if reach_set.symbolic_size() > 100_000 and biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Saturation({len(saturated_vars)}) Expanded reach_set: {reach_set}") + break + + # Once saturated, try to expand the saturated + # collection with either a conflict variable or + # other variable. + + # First try conflict vars, then other vars. + for var in conflict_vars + other_vars: + successors = graph.var_post_out(var, reach_set) + if not successors.is_empty(): + reach_set = reach_set.union(successors) + all_done = False + + # This is a bit wasteful but at this point it + # should be irrelevant for performance. + if var in conflict_vars: + conflict_vars.remove(var) + + if var in other_vars: + other_vars.remove(var) + + saturated_vars.append(var) + saturated_vars = sort_variable_list(saturated_vars) + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Saturation({len(saturated_vars)}) Added saturation variable. {len(conflict_vars)} conflict and {len(other_vars)} other variables remaining.") + break + + if biobalm.succession_diagram.DEBUG: + print(f"[{node_id}] > Reachability completed with {reach_set}.") + + return reach_set + + +def sort_variable_list(variables: list[VariableId]): + return list(sorted(variables, reverse=True)) diff --git a/biobalm/motif_avoidant.py b/biobalm/motif_avoidant.py deleted file mode 100644 index 7786d61..0000000 --- a/biobalm/motif_avoidant.py +++ /dev/null @@ -1,430 +0,0 @@ -""" -A module which provides utility methods for detecting motif-avoidant attractors -within a terminal restriction space. -""" - -from __future__ import annotations - -import random -from functools import reduce -from typing import TYPE_CHECKING - -from networkx import DiGraph # type: ignore -from pypint import Goal, InMemoryModel # type:ignore - -from biobalm.petri_net_translation import place_to_variable -from biobalm.space_utils import dnf_function_is_true, remove_state_from_dnf -from biobalm.symbolic_utils import ( - function_eval, - function_is_true, - state_list_to_bdd, - state_to_bdd, -) -from biobalm.types import BooleanSpace - -if TYPE_CHECKING: - from biodivine_aeon import AsynchronousGraph, Bdd - - -def make_retained_set( - graph: AsynchronousGraph, - nfvs: list[str], - space: BooleanSpace, - child_spaces: list[BooleanSpace] | None = None, -) -> BooleanSpace: - """ - Calculate the retained set for a given `space`. - - The retained set is technically a space-like object that describes the - variables which have to be fixed in order for the network to lose all - complex attractors. However, note that this really means changing the update - functions. This is not a trap space that only contains fixed-points, - but a description of how the network must be modified to eliminate - all complex attractors in the given `space`. - - The construction guarantees that any complex attractor of the old - network will manifest as at least one fixed-point in the new network. But this - network modification is not implemented here. This method only generates - the necessary list of variables and values. - - Finally, note that the method uses a heuristic to select values - that should lead to the least amount of fixed-points in - the modified network. - - Parameters - ---------- - graph : AsynchronousGraph - The symbolic update functions stored as an `AsynchronousGraph` object - from the `biodivine_aeon` library. - nfvs : list[str] - The list of variables in the NFVS that is valid for a network - restricted to the given `space`. - space : BooleanSpace - A :class:`BooleanSpace` object describing the - current trap space. - child_spaces : list[BooleanSpace] | None, optional - A list of :class:`BooleanSpace` objects - describing the child spaces of the current node. Only attractors that - are not in the child spaces are considered. If no child spaces are - provided, then all attractors are considered. - - Returns - ------- - BooleanSpace - A :class:`BooleanSpace` object describing the - retained set. - """ - - if child_spaces is None: - child_spaces = [] - - # Initially, the retained set only contains the fixed values from the - # current node space (this elimiantes unnecessary Petri net transitions for - # values which we already proved are constant). - # - # In the following code, we then extend the retained set based on the - # model's NFVS and the current child spaces. - retained_set = space.copy() - - # First, if there are any child spaces present, we extend the retained set - # with the values from the one that has the least amount of fixed variables - # shared with the NFVS. - if len(child_spaces) > 0: - # Find the child space that has the fewest nodes in common with the NFVS: - least_common_child_space = child_spaces[0] - least_common_nodes = len(set(least_common_child_space) & set(nfvs)) - for child_space in child_spaces: - common_nodes = len(set(child_space) & set(nfvs)) - if common_nodes < least_common_nodes: - least_common_nodes = common_nodes - least_common_child_space = child_space - - for x in least_common_child_space: - if (x not in retained_set) and (x in nfvs): - retained_set[x] = least_common_child_space[x] - - # Then, set the remaining NFVS variables based on the majority output value - # in the update function of the relevant variable. - for x in nfvs: - if x in retained_set: - continue - - fn_bdd = graph.mk_update_function(x) - - # If most of the function is positive, we set the value - # to `1`, otherwise set it to `0`. - if fn_bdd.cardinality() > fn_bdd.l_not().cardinality(): - retained_set[x] = 1 - else: - retained_set[x] = 0 - - return retained_set - - -def detect_motif_avoidant_attractors( - graph: AsynchronousGraph, - petri_net: DiGraph, - candidates: list[BooleanSpace], - terminal_restriction_space: Bdd, - max_iterations: int, - is_in_an_mts: bool = False, -) -> list[BooleanSpace]: - """ - Determine which seed states from the `candidates` list map to true - motif-avoidant attractors. - - Assumes that all attractors in the `terminal_restriction_space` are - covered by at least one state in `candidates`. Eliminates all states - from `candidates` that are provably not in an attractor, or that correspond - to an attractor that is already covered by some other candidate. - - The method consists of two steps: initial pruning, and exact reachability - analysis. As such, the result is always complete. However, some of the - parameters can be used to adjust the initial pruning step. - - If `is_in_an_mts` is set, the method assumes the `candidates` are members - of a minimal trap space. In such case, a different simulation algorithm - is used for initial pruning. Furthermore, `max_iterations` can be used to - to limit the time spent on the initial pruning step. - - Parameters - ---------- - graph : AsynchronousGraph - The symbolic update functions stored as an `AsynchronousGraph` object - from the `biodivine_aeon` library. - petri_net : DiGraph - The Petri net representation of the update functions. - candidates : list[BooleanSpace] - A list of :class:`BooleanSpace` objects - describing the candidate seed states. - terminal_restriction_space : Bdd - A symbolic set of states which contains all motif avoidant attractors - in question (i.e. if a candidate state can leave this set, the candidate - cannot be an attractor). - max_iterations : int - Specifies how much time should be spent on the simpler preprocessing methods. - is_in_an_mts : bool, optional - By default `False`. If `True`, then we assume that the candidates lie - within a minimal trap space, enabling certain optimizations. If - incorrectly set to `True`, the result is undefined. - - Returns - ------- - list[BooleanSpace] - A list of :class:`BooleanSpace` objects - describing the motif-avoidant attractors among the input candidate set. - """ - if len(candidates) == 0: - return [] - - if len(candidates) == 1 and is_in_an_mts: - return candidates - - candidates = _preprocess_candidates( - graph, - candidates, - terminal_restriction_space, - max_iterations, - is_in_an_mts=is_in_an_mts, - ) - - if len(candidates) == 0: - return [] - - if len(candidates) == 1 and is_in_an_mts: - return candidates - - return _filter_candidates(petri_net, candidates, terminal_restriction_space) - - -def _preprocess_candidates( - graph: AsynchronousGraph, - candidates: list[BooleanSpace], - terminal_restriction_space: Bdd, - max_iterations: int, - is_in_an_mts: bool = False, - simulation_seed: int = 0, -) -> list[BooleanSpace]: - """ - A fast but incomplete method for eliminating spurious attractor candidates. - - The idea is to build the symbolic encoding of the given `network`, and then - randomly simulate transitions for individual states, trying to reach a state - outside of the `terminal_restriction_space`. - - TODO (2): We could probably make this algorithm slighlty less random by doing - a limited version of symbolic reachability. I.e. instead of simulating just one - state transition in each step, compute the whole successor BDD and then test - against that. Once the BDD becomes too large after several steps, we can just - pick a single state from it and start again. Sam: I'll add a version of this - later, once we can actually benchmark how it performs :) - """ - # A random generator initialized with a fixed seed. Ensures simulation - # is randomized but deterministic. - generator = random.Random(simulation_seed) - - variables = graph.network_variable_names() - update_functions = {var: graph.mk_update_function(var) for var in variables} - - # Use stochastic simulation to prune the set of candidate states. - # We use different simulation approach depending on whether this space - # is a minimal trap or not. In previous work, this was shown to work - # well, but in the future we need to better document the resoning - # behind these two algorithms. - if not is_in_an_mts: - # Copy is sufficient because we won't be modifying the states within the set. - candidates_dnf = candidates.copy() - filtered_candidates: list[BooleanSpace] = [] - for state in candidates: - # Remove the state from the candidates. If we can prove that is - # is not an attractor, we will put it back. - candidates_dnf = remove_state_from_dnf(candidates_dnf, state) - - simulation = state.copy() # A copy of the state that we can overwrite. - is_valid_candidate = True - for _ in range(max_iterations): - # Advance all variables by one step in random order. - generator.shuffle(variables) - for var in variables: - step = function_eval(update_functions[var], simulation) - assert step is not None - simulation[var] = step - - if dnf_function_is_true(candidates_dnf, simulation): - # The state can reach some other state in the candidate - # set. This does not mean it cannot be an attractor, but - # it means it is sufficient to keep considering - # the remaining candidates. - is_valid_candidate = False - break - - if not function_is_true(terminal_restriction_space, simulation): - # The state can reach some other state outside of the - # terminal restriction space, which means it cannot be - # a motif avoidant attractor in this subspace. - is_valid_candidate = False - break - - if is_valid_candidate: - # If we cannot rule out the candidate, we have to put it back - # into the candidate set. - candidates_dnf.append(state) - filtered_candidates.append(state) - - return filtered_candidates - else: - filtered_candidates = [] - for _ in range(max_iterations): - generator.shuffle(variables) - candidates_dnf = candidates.copy() - filtered_candidates = [] - - for state in candidates: - candidates_dnf = remove_state_from_dnf(candidates_dnf, state) - - simulation = state.copy() - for var in variables: - step = function_eval(update_functions[var], simulation) - assert step is not None - simulation[var] = step - - if not dnf_function_is_true(candidates_dnf, simulation): - candidates_dnf.append(simulation) - filtered_candidates.append(simulation) - - if len(filtered_candidates) <= 1: - break - - candidates = filtered_candidates - - return filtered_candidates - - -def _filter_candidates( - petri_net: DiGraph, - candidates: list[BooleanSpace], - terminal_restriction_space: Bdd, -) -> list[BooleanSpace]: - """ - Filter candidate states using reachability procedure in Pint. - """ - - ctx = terminal_restriction_space.__ctx__() - candidates_bdd = state_list_to_bdd(ctx, candidates) - avoid_states = candidates_bdd.l_and_not(terminal_restriction_space) - filtered_candidates: list[BooleanSpace] = [] - - for state in candidates: - state_bdd = state_to_bdd(ctx, state) - - # Remove state from the symbolic set. If we can prove that it - # is not an attractor, we will put it back. - avoid_states = avoid_states.l_and_not(state_bdd) - - if not _Pint_reachability(petri_net, state, avoid_states): - avoid_states = avoid_states.l_or(state_bdd) - filtered_candidates.append(state) - - return filtered_candidates - - -def _Pint_reachability( - petri_net: DiGraph, - initial_state: BooleanSpace, - target_states: Bdd, -) -> bool: - """ - Use Pint to check if a given `initial_state` can possibly reach some state - in the `target_states` BDD. - - TODO: Here, if the result of static analysis is inconclusive, Pint falls back to `mole` - model checker. However, in the future, we might also explore other methods, such as - petri net reduction or symbolic reachability. - """ - if target_states.is_false(): - return False # Cannot reach a stat in an empty set. - - # Build a Pint model through an automata network and copy - # over the initial condition. - pint_model = InMemoryModel(_petri_net_as_automata_network(petri_net)) - for var, level in initial_state.items(): - pint_model.initial_state[var] = level - - goal = _Pint_build_symbolic_goal(target_states) - - return pint_model.reachability(goal=goal, fallback="mole") # type: ignore - - -def _Pint_build_symbolic_goal(states: Bdd) -> Goal: - """ - A helper method which (very explicitly) converts a set of states - represented through a BDD into a Pint `Goal`. - """ - assert not states.is_false() - - goals: list[Goal] = [] - for clause in states.clause_iterator(): - named_clause = { - states.__ctx__().get_variable_name(var): int(value) - for var, value in clause.items() - } - goal_atoms = [f"{var}={level}" for var, level in named_clause.items()] - goals.append(Goal(",".join(goal_atoms))) - - return reduce(lambda a, b: a | b, goals) - - -def _petri_net_as_automata_network(petri_net: DiGraph) -> str: - """ - Takes a Petri net which was created by implicant encoding from a Boolean network, - and builds an automata network file (`.an`) compatible with the Pint tool. - - TODO: This is one of those things that would probably be better served by having - an "explicit" `PetriNetEncoding` class. - """ - automata_network = "" - - # Go through all PN places and save them as model variables. - variable_set: set[str] = set() - for place, kind in petri_net.nodes(data="kind"): # type: ignore - if kind != "place": - continue - variable_set.add(place_to_variable(place)[0]) # type: ignore[reportUnknownArgumentType] # noqa - variables = sorted(variable_set) - - # Declare all variables with 0/1 domains. - for var in variables: - automata_network += f'"{var}" [0, 1]\n' - - for transition, kind in petri_net.nodes(data="kind"): # type: ignore - if kind != "transition": - continue - - predecessors = set(petri_net.predecessors(transition)) # type: ignore - successors = set(petri_net.successors(transition)) # type: ignore - - # The value under modification is the only - # value that is different between successors and predecessors. - source_place = next(iter(predecessors - successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - target_place = next(iter(successors - predecessors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - - (s_var, s_level) = place_to_variable(source_place) # type: ignore[reportUnknownArgumentType] # noqa - (t_var, t_level) = place_to_variable(target_place) # type: ignore[reportUnknownArgumentType] # noqa - assert s_var == t_var - - # The remaining places represent the necessary conditions. - # Here, we transform them into a text format. - conditions = sorted(predecessors.intersection(successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - conditions = [place_to_variable(p) for p in conditions] # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - conditions = [f'"{var}"={int(level)}' for var, level in conditions] - - # A pint rule consists of a variable name, value transition, - # and a list of necessary conditions for the transition (if any). - if len(conditions) == 0: - rule = f'"{s_var}" {int(s_level)} -> {int(t_level)}\n' - else: - rule = f"\"{s_var}\" {int(s_level)} -> {int(t_level)} when {' and '.join(conditions)}\n" - - automata_network += rule - - return automata_network diff --git a/biobalm/petri_net_translation.py b/biobalm/petri_net_translation.py index cae55d2..261daee 100644 --- a/biobalm/petri_net_translation.py +++ b/biobalm/petri_net_translation.py @@ -205,6 +205,9 @@ def restrict_petrinet_to_subspace( percolation. Variables that are fixed in the `sub_space` but do not exist in the Petri net are ignored. + The `sub_space` can contain variables that do not appear + in the `petri_net`. Such variables are simply ignored. + Parameters ---------- petri_net : DiGraph diff --git a/biobalm/succession_diagram.py b/biobalm/succession_diagram.py index 24c4278..f9c84ee 100644 --- a/biobalm/succession_diagram.py +++ b/biobalm/succession_diagram.py @@ -6,15 +6,20 @@ from typing import Iterator import networkx as nx # type: ignore -from biodivine_aeon import AsynchronousGraph, BooleanNetwork, VariableId +from biodivine_aeon import AsynchronousGraph, BooleanNetwork, VariableId, VertexSet -from biobalm._sd_algorithms.compute_attractor_seeds import compute_attractor_seeds +# Attractor detection algorithms. +from biobalm._sd_attractors.attractor_candidates import compute_attractor_candidates +from biobalm._sd_attractors.attractor_symbolic import compute_attractors_symbolic + +# SD expansion algorithms/heuristics. from biobalm._sd_algorithms.expand_attractor_seeds import expand_attractor_seeds from biobalm._sd_algorithms.expand_bfs import expand_bfs from biobalm._sd_algorithms.expand_dfs import expand_dfs from biobalm._sd_algorithms.expand_minimal_spaces import expand_minimal_spaces from biobalm._sd_algorithms.expand_source_SCCs import expand_source_SCCs from biobalm._sd_algorithms.expand_to_target import expand_to_target + from biobalm.interaction_graph_utils import ( cleanup_network, feedback_vertex_set, @@ -84,6 +89,16 @@ class SuccessionDiagram: for smaller networks, the trade-off is in favor of computing a smaller NFVS. """ + MAX_MOTIFS_PER_NODE = 100_000 + """ + An artificial limit on the number of stable motifs that can be + considered during the expansion a single node. + + This limit is in place mainly to avoid surprising out of memory errors, + because currently there is no logging mechanism that would report the + number of stable motifs gradually. + """ + __slots__ = ( "network", "symbolic", @@ -97,12 +112,12 @@ def __init__(self, network: BooleanNetwork): # Original Boolean network. self.network: BooleanNetwork = cleanup_network(network) """ - The Boolean network as an AEON network. + The Boolean network represented as a `biodivine_aeon.BooleanNetwork` object. """ self.symbolic: AsynchronousGraph = AsynchronousGraph(self.network) """ - The symbolic representation of the network. + The symbolic representation of the network using `biodivine_aeon.AsynchronousGraph`. """ self.petri_net: nx.DiGraph = network_to_petrinet(network) @@ -115,10 +130,9 @@ def __init__(self, network: BooleanNetwork): f"Generated global Petri net with {len(self.petri_net.nodes)} nodes and {len(self.petri_net.edges)} edges." ) - # Initially, we don't need the NFVS for anything. self.nfvs: list[str] | None = None """ - The negative feedback vertex set used for attractor detection. + The negative feedback vertex set of `SuccessionDiagram.network`, or `None` if not computed yet. """ self.dag: nx.DiGraph = nx.DiGraph() @@ -126,8 +140,7 @@ def __init__(self, network: BooleanNetwork): The directed acyclic graph (DAG) representation of the succession diagram structure. """ - # A dictionary used for uniqueness checks on the nodes of the succession - # diagram. See `SuccessionDiagram.ensure_node` for details. + self.node_indices: dict[int, int] = {} """ A dictionary mapping subspace keys to their positions in the succession @@ -200,15 +213,78 @@ def from_file(path: str) -> SuccessionDiagram: """ return SuccessionDiagram(BooleanNetwork.from_file(path)) + def expanded_attractor_candidates(self) -> dict[int, list[BooleanSpace]]: + """ + Attractor candidates for each expanded node. The candidate list is + computed for nodes where it is not known yet. + + Collectively, for every attractor in every expanded node, this returns + at least one candidate state for said attractor. However, for complex + attractors, there can be more than one candidate state, and there can + be candidates that are not contained in any attractor. + + *If called before the `SuccessionDiagram` is fully built, this will not + be a complete accounting of attractors, since any node that isn't expanded + is not included in the result.* + + See also: + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + + Returns + ------- + dict[int,list[BooleanSpace]] + Each key is the id of an expanded succession diagram node, whose + corresponding value is a list of attractor candidates for that node. Note + that one succession diagram node can have multiple attractors. Ordering + of the lists in the returned dictionary is not guaranteed. + + Example + ------- + >>> import biobalm + >>> sd = biobalm.SuccessionDiagram.from_rules(\""" + ... A, B + ... B, A & C + ... C, !A | B + ... \""") + >>> sd.build() + >>> eas = sd.expanded_attractor_candidates() + >>> for id, atts in sorted(eas.items()): + ... for x in atts: + ... print(f"{id}: {dict(sorted(x.items()))}") + 1: {'A': 0, 'B': 0, 'C': 1} + 2: {'A': 1, 'B': 1, 'C': 1} + """ + res: dict[int, list[BooleanSpace]] = {} + for id in self.expanded_ids(): + atts = self.node_attractor_candidates(id, compute=True) + if not atts: # no attractors for this node + continue + res[id] = atts + + return res + def expanded_attractor_seeds(self) -> dict[int, list[BooleanSpace]]: """ - Attractor seeds for each expanded node. + Attractor seeds for each expanded node. The seed list is + computed for nodes where it is not known yet. + + Collectively, for every attractor in every expanded node, this returns + exactly one seed state for said attractor. - An attractor seed is a state belonging to an attractor (and thus a state - from which the entire attractor is reachable, by definition). + *If called before the `SuccessionDiagram` is fully built, this will not + be a complete accounting of attractors, since any node that isn't expanded + is not included in the result.* - If called before the `SuccessionDiagram` is fully built, this will not - be a complete accounting of attractor seed states. + See also: + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` Returns ------- @@ -220,6 +296,10 @@ def expanded_attractor_seeds(self) -> dict[int, list[BooleanSpace]]: Example ------- + + *Note that for this simple network, attractor candidates and attractor + seeds are the same states.* + >>> import biobalm >>> sd = biobalm.SuccessionDiagram.from_rules(\""" ... A, B @@ -236,7 +316,62 @@ def expanded_attractor_seeds(self) -> dict[int, list[BooleanSpace]]: """ res: dict[int, list[BooleanSpace]] = {} for id in self.expanded_ids(): - atts = self.node_attractor_seeds(id) + atts = self.node_attractor_seeds(id, compute=True) + if not atts: # no attractors for this node + continue + res[id] = atts + + return res + + def expanded_attractor_sets(self) -> dict[int, list[VertexSet]]: + """ + Attractor sets for each expanded node. The sets are + computed for nodes where they are not known yet. + + These sets represent the complete collection of attractor states in each + expanded node. For fixed-point attractors, this is effectively equivalent + to the attractor seeds. For complex attractors, this set contains the + full attractor. Hence, it is harder to compute but can facilitate richer + post-processing and analysis. + + *If called before the `SuccessionDiagram` is fully built, this will not + be a complete accounting of attractors, since any node that isn't expanded + is not included in the result.* + + See also: + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + - :meth:`expanded_attractor_seeds` + + Returns + ------- + dict[int,list[biodivine_aeon.VertexSet]] + Each key is the id of an expanded succession diagram node, whose + corresponding value is a list of attractor sets for that node. Note + that one succession diagram node can have multiple attractors. Ordering + of the lists in the returned dictionary is not guaranteed. + + Example + ------- + >>> import biobalm + >>> sd = biobalm.SuccessionDiagram.from_rules(\""" + ... A, B + ... B, A & C + ... C, !A | B + ... \""") + >>> sd.build() + >>> eas = sd.expanded_attractor_sets() + >>> for id, atts in sorted(eas.items()): + ... for x in atts: + ... print(f"{id}: {x}") + 1: VertexSet(cardinality=1, symbolic_size=5) + 2: VertexSet(cardinality=1, symbolic_size=5) + """ + res: dict[int, list[VertexSet]] = {} + for id in self.expanded_ids(): + atts = self.node_attractor_sets(id, compute=True) if not atts: # no attractors for this node continue res[id] = atts @@ -434,10 +569,19 @@ def node_data(self, node_id: int) -> NodeData: Returns a `NodeData` object with the following attributes: - `depth`: The depth of the node. - - `attractors`: The attractors of the node (if computed). - - `petri_net`: The Petri net representation of the node (if computed). - `space`: The sub-space of the node. - `expanded`: Whether the node is expanded or not. + - `percolated_network`: [`None` if not computed] The percolation of + `SuccessionDiagram.network` with respect to the node's `space`. + - `percolated_petri_net`: [`None` if not computed] The percolation of + `SuccessionDiagram.petri_net` with respect to the node's `space`. + - `percolated_nfvs`: [`None` if not computed] The NFVS of `percolated_network`. + - `attractor_candidates`: [`None` if not computed] A collection of states + that collectively cover every attractor of this node. + - `attractor_seeds`: [`None` if not computed] A collection of states + one-to-one corresponding to the attractors of this node. + - `attractor_sets`: [`None` if not computed] A complete collection of + attractors of this node. See :class:`biobalm.types.NodeData` for more information. @@ -454,6 +598,29 @@ def node_data(self, node_id: int) -> NodeData: """ return cast(NodeData, self.dag.nodes[node_id]) + def reclaim_node_data(self): + """ + Removes non-essential data from the `NodeData` dictionary of each node. + + This method can be used to reduce the memory footprint of the succession + diagram, especially before serialization. However, note that this can + also slow down subsequent computations if the erased data needs + to be re-computed. + + The method removes the `percolated_network`, `percolated_petri_net`, + and the `percolated_nfvs`. Furthermore, if `attractor_seeds` are known, + it erases the `attractor_candidates`, since seeds can be used for the + same tasks. + """ + + for node_id in self.node_ids(): + data = self.node_data[node_id] + data["percolated_network"] = None + data["percolated_petri_net"] = None + data["percolated_nfvs"] = None + if data["attractor_seeds"] is not None: + data["attractor_candidates"] = None + def node_is_minimal(self, node_id: int) -> bool: """ True if the node represents a minimal trap space. @@ -505,7 +672,7 @@ def node_successors(self, node_id: int, compute: bool = False) -> list[int]: list[int] The list of successor node ids. """ - node = cast(dict[str, Any], self.dag.nodes[node_id]) + node = self.node_data(node_id) if not node["expanded"] and not compute: raise KeyError(f"Node {node_id} is not expanded.") if not node["expanded"]: @@ -513,8 +680,88 @@ def node_successors(self, node_id: int, compute: bool = False) -> list[int]: return list(self.dag.successors(node_id)) # type: ignore + def node_attractor_candidates( + self, + node_id: int, + compute: bool = False, + greedy_asp_minification: bool = True, + simulation_minification: bool = True, + pint_minification: bool = False, + ) -> list[BooleanSpace]: + """ + Return the list of attractor candidate states for the given `node_id`. + + If attractor candidates are not computed but seeds are, returns + attractor seeds, as these are also valid as candidate states, but + even more precise. + + Similar to :meth:`node_successors`, the method either computes the + data if unknown, or throws an exception, depending on the `compute` + flag. If `compute` is set to `True`, additional flags can be used + to adjust the candidate identification process (see *Parameters*). + + Note that you can compute attractor candidates for nodes that are not expanded, + but (a) multiple unexpanded nodes can contain the same attractor, and hence also + the same/similar candidates (i.e. you can "discover" the same attractor in multiple + unexpanded nodes, if the nodes intersect), and (b) this data is erased if the + node is later expanded. + + Parameters + ---------- + node_id: int + The ID of the node. + compute: bool + Whether to compute the attractor candidates if they are not already known. + greedy_asp_minification: bool + Indicate that the initial candidate set should be first greedily minified + through repeated ASP queries. [Default: True] + simulation_minification: bool + Indicate that the candidate set should be refined through stochastic + simulation. [Default: True] + pint_minification: bool + Indicate that the candidate set should be refined through reachability + analysis using `pint`. Only enable this option if you actually have `pint` + installed (it is an optional dependency). [Default: False] + + Returns + ------- + list[BooleanSpace] + The list of attractor candidate states. + """ + node = self.node_data(node_id) + + candidates = node["attractor_candidates"] + + # If candidates are already cleared, but seeds are known, we can + # just use seeds. + if candidates is None and node["attractor_seeds"] is not None: + return node["attractor_seeds"] + + if candidates is None and not compute: + raise KeyError(f"Attractor candidates not computed for node {node_id}.") + + if candidates is None: + candidates = compute_attractor_candidates( + self, + node_id, + greedy_asp_minification, + simulation_minification, + pint_minification + ) + node["attractor_candidates"] = candidates + + # If the computed candidates are actually valid as seeds, just + # propagate the value so that it doesn't need to be computed later. + node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal(node_id) + if len(candidates) == 0 or (node_is_pseudo_minimal and len(candidates) == 1): + node["attractor_seeds"] = candidates + + return candidates + def node_attractor_seeds( - self, node_id: int, compute: bool = False + self, + node_id: int, + compute: bool = False, ) -> list[BooleanSpace]: """ Return the list of attractor seed states for the given `node_id`. @@ -523,10 +770,8 @@ def node_attractor_seeds( data if unknown, or throws an exception, depending on the `compute` flag. - Note that you can compute attractor seeds for stub nodes, but (a) these - attractors are not guaranteed to be unique (i.e. you can "discover" the - same attractor in multiple stub nodes, if the stub nodes intersect), and - (b) this data is erased if the stub node is expanded later on. + Note that the same considerations regarding attractors in unexpanded + nodes apply as for :meth:`node_attractor_candidates`. Parameters ---------- @@ -540,29 +785,99 @@ def node_attractor_seeds( list[BooleanSpace] The list of attractor seed states. """ - node = cast(NodeData, self.dag.nodes[node_id]) + node = self.node_data(node_id) + + seeds = node["attractor_seeds"] + + if seeds is None and not compute: + raise KeyError(f"Attractor seeds not computed for node {node_id}.") + + if seeds is None: + candidates = self.node_attractor_candidates(node_id, compute=True) + # Typically, this should be done when computing the candidates, but just in case + # something illegal happended... if we can show that the current candidate set + # is optimal, we just keep it and don't compute the attractors symbolically. + node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal(node_id) + if len(candidates) == 0 or (node_is_pseudo_minimal and len(candidates) == 1): + node["attractor_seeds"] = candidates + seeds = candidates + else: + result = compute_attractors_symbolic( + self, + node_id, + candidate_states=candidates, + seeds_only=True + ) + node["attractor_seeds"] = result[0] + # At this point, attractor_sets could be `None`, but that + # is valid, as long as we actually compute them later when + # they are needed. + node["attractor_sets"] = result[1] + seeds = result[0] + + return seeds + + def node_attractor_sets( + self, + node_id: int, + compute: bool = False, + ) -> list[VertexSet]: + """ + Return the list of attractor sets for the given `node_id`. - attractors = node["attractors"] + Similar to :meth:`node_successors`, the method either computes the + data if unknown, or throws an exception, depending on the `compute` + flag. - if attractors is None and not compute: - raise KeyError(f"Attractor data not computed for node {node_id}.") + Note that the same considerations regarding attractors in unexpanded + nodes apply as for :meth:`node_attractor_candidates`. - if attractors is None: - attractors = compute_attractor_seeds(self, node_id) - node["attractors"] = attractors + Parameters + ---------- + node_id: int + The ID of the node. + compute: bool + Whether to compute the attractor sets if they are not already known. - return attractors + Returns + ------- + list[biodivine_aeon.VertexSet] + The list of attractor sets. + """ + node = self.node_data(node_id) - def node_nfvs(self, node_id: int) -> list[str]: + sets = node["attractor_sets"] + + if sets is None and not compute: + raise KeyError(f"Attractor sets not computed for node {node_id}.") + + if sets is None: + seeds = self.node_attractor_seeds(node_id, compute=True) + result = compute_attractors_symbolic(self, node_id, candidate_states=seeds) + assert result[1] is not None + node["attractor_sets"] = result[1] + sets = result[1] + + return sets + + def node_percolated_nfvs(self, node_id: int, compute: bool = False) -> list[str]: """ - Approximate minimum negative feedback vertex set on the subspace for the given node. + Approximate minimum negative feedback vertex set on the Boolean network + percolated to the node's sub-space. + + Similar to :meth:`node_successors`, the method either computes the + data if unknown, or throws an exception, depending on the `compute` + flag. - See :func:`biobalm.interaction_graph_utils.feedback_vertex_set` for further details. + See :func:`biobalm.interaction_graph_utils.feedback_vertex_set` for + further details. Parameters ---------- node_id: int The ID of the node. + compute: bool + Whether to compute the node NFVS if it is not already known. Returns ------- @@ -570,20 +885,136 @@ def node_nfvs(self, node_id: int) -> list[str]: The negative feedback vertex set, as a list of node names. """ - # TODO: Right now, we are just returning the gloval FVS. But in the future, - # we probably want to compute a separate FVS for each node, because it could - # be much smaller if a lot of the nodes are fixed. assert node_id in self.dag.nodes - if self.nfvs is None: - if self.network.variable_count() < SuccessionDiagram.NFVS_NODE_THRESHOLD: + node = self.node_data(node_id) + + if node["percolated_nfvs"] is None and not compute: + raise KeyError(f"NFVS not computed for node {node_id}.") + + if node["percolated_nfvs"] is None: + percolated_network = self.node_percolated_network(node_id, compute) + if percolated_network.variable_count() < SuccessionDiagram.NFVS_NODE_THRESHOLD: # Computing the *negative* variant of the FVS is surprisingly costly. # Hence it mostly makes sense for the smaller networks only. - self.nfvs = feedback_vertex_set(self.network, parity="negative") + nfvs = feedback_vertex_set(percolated_network, parity="negative") else: - self.nfvs = feedback_vertex_set(self.network) + nfvs = feedback_vertex_set(percolated_network) + node["percolated_nfvs"] = nfvs + else: + nfvs = node["percolated_nfvs"] + + return nfvs - return self.nfvs + def node_percolated_network(self, node_id: int, compute: bool = False) -> BooleanNetwork: + """ + The Boolean network percolated to the node's sub-space, with + constant variables removed. + + Similar to :meth:`node_successors`, the method either computes the + data if unknown, or throws an exception, depending on the `compute` + flag. + + Parameters + ---------- + node_id: int + The ID of the node. + compute: bool + Whether to compute the node NFVS if it is not already known. + + Returns + ------- + biodivine_aeon.BooleanNetwork + The percolated Boolean network. + """ + + assert node_id in self.dag.nodes + + node = self.node_data(node_id) + network = node["percolated_network"] + + node_space = node["space"] + + if len(node_space) == self.network.variable_count(): + # If fixed point, no need to compute, the network is always empty. + return BooleanNetwork() + + if network is None and not compute: + raise KeyError(f"Percolated network not computed for node {node_id}.") + + if network is None: + network = percolate_network(self.network, node_space, self.symbolic, remove_constants=True) + if DEBUG: + print(f"[{node_id}] Computed percolated network with {network.variable_count()} variables (vs {self.network.variable_count()}).") + node["percolated_network"] = network + + return network + + def node_percolated_petri_net( + self, + node_id: int, + compute: bool = False, + parent_id: int | None = None, + ) -> nx.DiGraph: + """ + The Petri net representation of the Boolean network percolated to the + node's sub-space (with constant variables removed). + + Similar to :meth:`node_successors`, the method either computes the + data if unknown, or throws an exception, depending on the `compute` + flag. + + Parameters + ---------- + node_id: int + The ID of the node. + compute: bool + Whether to compute the node NFVS if it is not already known. + parent_id: int | None + If given, the percolation process starts with the Petri net of the given + parent node (if computed). If parent is not given, the percolation starts + with `SuccessionDiagram.petri_net`, which can be slower but yields the + same result. + + Returns + ------- + networkx.DiGraph + The percolated Boolean network. + """ + + assert node_id in self.dag.nodes + + node = self.node_data(node_id) + percolated_pn = node["percolated_petri_net"] + + node_space = node["space"] + + if len(node_space) == self.network.variable_count(): + # If fixed point, the result is always empty. + return nx.DiGraph() + + if percolated_pn is None and not compute: + raise KeyError(f"Percolated network not computed for node {node_id}.") + + if percolated_pn is None: + base_pn = self.petri_net + percolate_space = node_space + if parent_id is not None: + parent_pn = self.node_data(parent_id)["percolated_petri_net"] + if parent_pn is not None: + base_pn = parent_pn + percolate_space = node_space + + percolated_pn = restrict_petrinet_to_subspace(base_pn, percolate_space) + + if DEBUG: + print( + f"[{node_id}] Generated Petri net restriction with {len(percolated_pn.nodes)} nodes and {len(percolated_pn.edges)} edges." + ) + + node["percolated_petri_net"] = percolated_pn + + return percolated_pn def edge_stable_motif( self, parent_id: int, child_id: int, reduced: bool = False @@ -826,52 +1257,6 @@ def _update_node_depth(self, node_id: int, parent_id: int): current_depth = cast(int, self.dag.nodes[node_id]["depth"]) self.dag.nodes[node_id]["depth"] = max(current_depth, parent_depth + 1) - def _update_node_petri_net(self, node_id: int, parent_id: int | None): - """ - Try to compute a restricted Petri net for this node using the Petri net - stored in the node's parent. - - If the parent Petri net does not exist or the parent is not specified, the - restriction is performed on the "global" Petri net object, which is usually - slower but should yield the same result. - - No Petri net is computed if the specified node is a fixed-point, since - such Petri net is always empty. - """ - - node_space = self.node_data(node_id)["space"] - - if len(node_space) == self.network.variable_count(): - # If fixed point, no need to compute. - return - - if parent_id is not None: - parent_pn = self.node_data(parent_id)["petri_net"] - if parent_pn is None: - pn = self.petri_net - else: - pn = parent_pn - else: - pn = self.petri_net - - restricted_pn = restrict_petrinet_to_subspace(pn, node_space) - - if DEBUG: - print( - f"[{node_id}] Generated Petri net restriction with {len(restricted_pn.nodes)} nodes and {len(restricted_pn.edges)} edges." - ) - - self.dag.nodes[node_id]["petri_net"] = restricted_pn - - def _clear_node_petri_net(self, node_id: int): - """ - Remove the computed Petri net for this node. - - This is typically done once the node is expanded, since we know the Petri net won't - be needed anymore. - """ - del self.dag.nodes[node_id]["petri_net"] - def _expand_one_node(self, node_id: int): """ An internal method that expands a single node of the succession diagram. @@ -889,7 +1274,13 @@ def _expand_one_node(self, node_id: int): return node["expanded"] = True - node["attractors"] = None + + # If the node had any attractor data computed as unexpanded, these are + # no longer valid and need to be erased. + node["attractor_seeds"] = None + node["attractor_candidates"] = None + node["attractor_sets"] = None + current_space = node["space"] if DEBUG: @@ -912,12 +1303,17 @@ def _expand_one_node(self, node_id: int): source_nodes = extract_source_variables(self.petri_net) sub_spaces: list[BooleanSpace] - pn = self.node_data(node_id)["petri_net"] + + # Only use the percolated PN if it is already known. + pn = self.node_percolated_petri_net(node_id, compute=False) if pn is not None: # We have a pre-propagated PN for this sub-space, hence we can use # that to compute the trap spaces. partial_sub_spaces = trappist( - pn, problem="max", optimize_source_variables=source_nodes + pn, + problem="max", + optimize_source_variables=source_nodes, + solution_limit=SuccessionDiagram.MAX_MOTIFS_PER_NODE, ) sub_spaces = [(s | current_space) for s in partial_sub_spaces] else: @@ -928,8 +1324,12 @@ def _expand_one_node(self, node_id: int): problem="max", ensure_subspace=current_space, optimize_source_variables=source_nodes, + solution_limit=SuccessionDiagram.MAX_MOTIFS_PER_NODE, ) + if len(sub_spaces) == SuccessionDiagram.MAX_MOTIFS_PER_NODE: + raise RuntimeError(f"Exceeded the maximum amount of stable motifs per node (SuccessionDiagram.MAX_MOTIFS_PER_NODE={SuccessionDiagram.MAX_MOTIFS_PER_NODE}).") + # Sort the spaces based on a unique key in case trappist is not always # sorted deterministically. sub_spaces = sorted( @@ -950,9 +1350,6 @@ def _expand_one_node(self, node_id: int): if DEBUG: print(f"[{node_id}] Created edge into node {child_id}.") - # The node is now fully expanded, hence we won't need the restricted PN anymore. - self._clear_node_petri_net(node_id) - def _ensure_node(self, parent_id: int | None, stable_motif: BooleanSpace) -> int: """ Internal method that ensures the provided node is present in this @@ -980,7 +1377,12 @@ def _ensure_node(self, parent_id: int | None, stable_motif: BooleanSpace) -> int space=fixed_vars, depth=0, expanded=False, - attractors=None, + percolated_network=None, + percolated_petri_net=None, + percolated_nfvs=None, + attractor_candidates=None, + attractor_seeds=None, + attractor_sets=None, ) self.node_indices[key] = child_id else: @@ -995,6 +1397,11 @@ def _ensure_node(self, parent_id: int | None, stable_motif: BooleanSpace) -> int self.dag.add_edge(parent_id, child_id, motif=stable_motif) # type: ignore self._update_node_depth(child_id, parent_id) - self._update_node_petri_net(child_id, parent_id) + # Compute the percolated petri net here, because we know the parent node ID + # and using its already percolated petri net helps a lot. + # + # (For percolated network and nfvs, knowing the parent ID is not as useful, + # so the parent is not used and we can just compute them lazily) + self.node_percolated_petri_net(child_id, compute=True, parent_id=parent_id) return child_id diff --git a/biobalm/symbolic_utils.py b/biobalm/symbolic_utils.py index 5717469..f6cb78b 100644 --- a/biobalm/symbolic_utils.py +++ b/biobalm/symbolic_utils.py @@ -4,14 +4,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast if TYPE_CHECKING: - from typing import Literal - from biodivine_aeon import Bdd, SymbolicContext -from biodivine_aeon import BddVariableSet +from biodivine_aeon import BddVariableSet, BddValuation from biobalm.types import BooleanSpace @@ -128,3 +126,29 @@ def function_is_true(f: Bdd, state: BooleanSpace) -> bool: `True` if the function evaluates to `1` in the given state. """ return function_eval(f, state) == 1 + + +def valuation_to_state(ctx: SymbolicContext, valuation: BddValuation) -> BooleanSpace: + """ + Extract network state from a `BddValuation` into a `BooleanSpace`. + + Parameters + ---------- + f : SymbolicContext + A context which maps between network variables and their symbolic counterparts. + state : BddValuation + A valuation of the network's symbolic encoding. + + Returns + ------- + BooleanSpace + A `BooleanSpace` encoding the state data from the given valuation. + """ + + result: BooleanSpace = {} + for (var, val) in valuation.items(): + n_var = ctx.find_network_variable(var) + if n_var is not None: + result[ctx.get_network_variable_name(n_var)] = cast(Literal[0, 1], int(val)) + + return result diff --git a/biobalm/types.py b/biobalm/types.py index d4eb95e..9f5692f 100644 --- a/biobalm/types.py +++ b/biobalm/types.py @@ -3,6 +3,7 @@ from typing import Literal, TypeAlias, TypedDict import networkx as nx # type: ignore +import biodivine_aeon as ba BooleanSpace: TypeAlias = dict[str, Literal[0, 1]] """Type alias for `dict[str, Literal[0, 1]]`. Represents a Boolean subspace, which is defined by a set of fixed node values.""" @@ -61,17 +62,6 @@ class NodeData(TypedDict): the longest is used. """ - attractors: list[BooleanSpace] | None - """ - Attractor seed states for the node. If `None`, these have not been - computed, and the node may or may not have associated attractors. - """ - - petri_net: nx.DiGraph | None - """ - The Petri net representation of the node. If `None`, this has not been - computed yet. - """ space: BooleanSpace """ The sub-space that the node represents (this subspace will always be a @@ -87,3 +77,79 @@ class NodeData(TypedDict): If `expanded=True`, the node must have all its successors computed and included in the succession diagram. """ + + percolated_network: ba.BooleanNetwork | None + """ + The AEON `BooleanNetwork` that has variables fixed and percolated + according to the node `space`. Constant variables are then fully + eliminated. + + If `None`, this has not been computed yet (`SuccessionDiagram.network` can be + often used instead). + """ + + percolated_petri_net: nx.DiGraph | None + """ + The Petri net representation of the network rules percolated to the + node's sub-space (i.e. a Petri net encoding of the `percolated_network`). + Constant variables are fully eliminated. + + If `None`, this has not been computed yet (`SuccessionDiagram.petri_net` can be + often used instead). + """ + + percolated_nfvs: list[str] | None + """ + An NFVS of the `percolated_network`. + + Note that this contains no variables that are fixed by the node `space`. + Also, there is no guarantee that this is a subset of `SuccessionDiagram.nfvs`. + + If `None`, this has not been computed yet (`SuccessionDiagram.nfvs` can be + often used instead). + """ + + attractor_candidates: list[BooleanSpace] | None + """ + List of states that cover all network attractors in the node's sub-space + (excluding child nodes, if known). + + Note that each complex/cyclic attractor can be covered by more than + one state, but each attractor has to be covered by at least one state. Furthermore, + outside of minimal trap spaces, some candidate states can cover zero attractors. + That is, even if the candidate set is not empty, it does not guarantee that + the non-minimal sub-space contains attractors. + + If `None`, these have not been computed, and the node may or may not + have associated attractors. + """ + + attractor_seeds: list[BooleanSpace] | None + """ + List of states that one-to-one correspond to network attractors in the node's + sub-space (excluding child nodes, if known). + + This is very similar to `attractor_candidates`, but here, it is guaranteed + that each attractor is represented by exactly one state. + + If `None`, these have not been computed, and the node may or may not + have associated attractors. + """ + + attractor_sets: list[ba.VertexSet] | None + """ + List of attractors that are present in the node's sub-space (excluding + child nodes, if known). + + Each attractor is represented symbolically using `biodivine_aeon.VertexSet`. + + Note that for fixed-point attractors, the attractor set is effectively + equivalent to the attractor seed. However, for complex attractors, this + set containes *all* the attractor states as opposed to just one. Hence it + is harder to compute, but can be used to analyze things like the average + value of each variable in the attractor states, or the presence of + particular oscillation patterns. + + If `None`, these have not been computed, and the node may or may not + have associated attractors. + """ diff --git a/pyproject.toml b/pyproject.toml index a293882..c7dc6e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,17 +6,20 @@ readme = "README.md" license = { file = "LISENSE" } requires-python = ">=3.11" dependencies = [ - 'biodivine_aeon ==1.0.0a4', + 'biodivine_aeon ==1.0.0a6', 'clingo >=5.6.2', - 'networkx>=2.8.8', - 'pypint>=1.6.2', + 'networkx >=2.8.8', ] version = "0.1.1" +# Pint can be used for static analysis, but is not mandatory. +[project.optional-dependencies] +pint = [ + "pypint==1.6.2" +] [tool.setuptools] -packages = ['biobalm', 'biobalm._sd_algorithms'] - +packages = ['biobalm', 'biobalm._sd_algorithms', 'biobalm._sd_attractors'] [tool.pyright] include = ["biobalm", "tests"] diff --git a/requirements.txt b/requirements.txt index 1bf8f37..d1b58b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,4 @@ -biodivine_aeon==1.0.0a4 +biodivine_aeon==1.0.0a6 clingo==5.6.2 networkx==2.8.8 -pypint==1.6.2 -# This will be eventually required by pandas (which is required by pint), -# but for now it prevents pandas from printing warnings all over the place. -pyarrow==15.0.0 \ No newline at end of file +pypint==1.6.2 \ No newline at end of file diff --git a/tests/motif_avoidant_test.py b/tests/motif_avoidant_test.py.old similarity index 100% rename from tests/motif_avoidant_test.py rename to tests/motif_avoidant_test.py.old From ec83587b5c8c54c1378647af07c6f89330421d0a Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 09:38:35 +0200 Subject: [PATCH 05/18] Formatting. --- biobalm/_pint_reachability.py | 9 +- biobalm/_sd_algorithms/expand_source_SCCs.py | 8 +- .../_sd_attractors/attractor_candidates.py | 138 +++++++++++++----- biobalm/_sd_attractors/attractor_symbolic.py | 48 +++--- biobalm/succession_diagram.py | 44 ++++-- biobalm/symbolic_utils.py | 2 +- 6 files changed, 173 insertions(+), 76 deletions(-) diff --git a/biobalm/_pint_reachability.py b/biobalm/_pint_reachability.py index e526f87..d8e05e1 100644 --- a/biobalm/_pint_reachability.py +++ b/biobalm/_pint_reachability.py @@ -14,7 +14,10 @@ from networkx import DiGraph # type: ignore from pypint import Goal, InMemoryModel # type:ignore -from biobalm.petri_net_translation import place_to_variable, _optimized_recursive_dnf_generator +from biobalm.petri_net_translation import ( + place_to_variable, + _optimized_recursive_dnf_generator, +) from biobalm.types import BooleanSpace import biobalm @@ -90,7 +93,9 @@ def _pint_build_symbolic_goal(states: Bdd) -> Goal: # break here and don't continue. This is not ideal but I'm not sure # how to fix this other than modifying `pint` itself. if biobalm.succession_diagram.DEBUG: - print("WARNING: `pint` goal size limit exceeded. A partial goal is used.") + print( + "WARNING: `pint` goal size limit exceeded. A partial goal is used." + ) break goal_atoms = [f"{var}={level}" for var, level in named_clause.items()] diff --git a/biobalm/_sd_algorithms/expand_source_SCCs.py b/biobalm/_sd_algorithms/expand_source_SCCs.py index 6214581..55ca8c0 100644 --- a/biobalm/_sd_algorithms/expand_source_SCCs.py +++ b/biobalm/_sd_algorithms/expand_source_SCCs.py @@ -77,7 +77,9 @@ def expand_source_SCCs( next_level.append(sd._ensure_node(root, sub_space)) # type: ignore sd.node_data(root)["expanded"] = True - sd.node_data(root)["attractor_seeds"] = [] # no need to look for attractors here + sd.node_data(root)[ + "attractor_seeds" + ] = [] # no need to look for attractors here current_level = next_level next_level = [] @@ -132,7 +134,9 @@ def expand_source_SCCs( for node_id in final_level: # These assertions should be unnecessary, but just to be sure. assert not sd.node_data(node_id)["expanded"] # expand nodes from here - assert sd.node_data(node_id)["attractor_seeds"] is None # check attractors from here + assert ( + sd.node_data(node_id)["attractor_seeds"] is None + ) # check attractors from here # restore this once we allow all expansion algorithms to expand from a node # expander(sd, node_id) diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index 2eb4f40..fdc52c5 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -119,7 +119,9 @@ def compute_attractor_candidates( if biobalm.succession_diagram.DEBUG: root_nfvs = sd.node_percolated_nfvs(sd.root(), compute=True) - print(f"[{node_id}] > Percolated node NFVS contains {len(node_nfvs)} nodes (vs {len(root_nfvs)} in the root).") + print( + f"[{node_id}] > Percolated node NFVS contains {len(node_nfvs)} nodes (vs {len(root_nfvs)} in the root)." + ) # Compute the list of child stable-motifs if the node is expanded. Otherwise # "pretend" that there are no children. @@ -130,14 +132,18 @@ def compute_attractor_candidates( child_motifs_reduced = [] if node_data["expanded"]: children = sd.node_successors(node_id, compute=False) - child_motifs_reduced = [sd.edge_stable_motif(node_id, s, reduced=True) for s in children] + child_motifs_reduced = [ + sd.edge_stable_motif(node_id, s, reduced=True) for s in children + ] # Indicates that this space is either minimal, or has no computed successors. # In either case, the space must contain at least one attractor. node_is_pseudo_minimal = len(child_motifs_reduced) == 0 if biobalm.succession_diagram.DEBUG and node_is_pseudo_minimal: - print(f"[{node_id}] > The node has no children (i.e. it is minimal or unexpanded).") + print( + f"[{node_id}] > The node has no children (i.e. it is minimal or unexpanded)." + ) if len(node_nfvs) == 0: # If the NFVS is empty, it means this node space has no complex attractors. @@ -157,7 +163,9 @@ def compute_attractor_candidates( if not node_is_pseudo_minimal: if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Attractor candidates done: empty NFVS in a non-minimal space.") + print( + f"[{node_id}] > Attractor candidates done: empty NFVS in a non-minimal space." + ) return [] # First, we create a retained set and corresponding candidate set that is valid in the @@ -182,13 +190,17 @@ def compute_attractor_candidates( bn_reduced = sd.node_percolated_network(node_id, compute=True) graph_reduced = AsynchronousGraph(bn_reduced) - retained_set = make_heuristic_retained_set(graph_reduced, node_nfvs, child_motifs_reduced) + retained_set = make_heuristic_retained_set( + graph_reduced, node_nfvs, child_motifs_reduced + ) if len(retained_set) == sd.network.variable_count() and node_is_pseudo_minimal: # If the retained set describes a fixed point, then only one attractor # is present in this space and it must contain the state described by the retained set. if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Singular attractor found through fixed-point retained set. Done.") + print( + f"[{node_id}] > Singular attractor found through fixed-point retained set. Done." + ) return [retained_set | node_space] if not greedy_asp_minification: @@ -196,27 +208,35 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=CANDIDATES_HARD_LIMIT + solution_limit=CANDIDATES_HARD_LIMIT, ) if len(candidate_states) == CANDIDATES_HARD_LIMIT: - raise RuntimeError(f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT}).") + raise RuntimeError( + f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT})." + ) if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Computed {len(candidate_states)} candidate states without retained set optimization.") + print( + f"[{node_id}] Computed {len(candidate_states)} candidate states without retained set optimization." + ) else: candidate_states = compute_fixed_point_reduced_STG( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=RETAINED_SET_OPTIMIZATION_THRESHOLD + solution_limit=RETAINED_SET_OPTIMIZATION_THRESHOLD, ) if len(candidate_states) < RETAINED_SET_OPTIMIZATION_THRESHOLD: # The candidate set is small. No need to overthink it. The original # retained set is probably quite good. However, we can still try to # optimize it further if it makes sense. - if len(candidate_states) > 1 or (not node_is_pseudo_minimal and len(candidate_states) > 0): + if len(candidate_states) > 1 or ( + not node_is_pseudo_minimal and len(candidate_states) > 0 + ): if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Initial retained set generated {len(candidate_states)} candidates. Optimizing...") + print( + f"[{node_id}] Initial retained set generated {len(candidate_states)} candidates. Optimizing..." + ) optimized = asp_greedy_retained_set_optimization( node_id, petri_net=pn_reduced, @@ -230,7 +250,9 @@ def compute_attractor_candidates( # There seem to be many candidates, in which case it might be better # to optimize the retained set dynamically. if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Initial retained set generated >{RETAINED_SET_OPTIMIZATION_THRESHOLD} candidates. Regenerate retained set.") + print( + f"[{node_id}] Initial retained set generated >{RETAINED_SET_OPTIMIZATION_THRESHOLD} candidates. Regenerate retained set." + ) retained_set = {} candidate_states = [] for var in node_nfvs: @@ -239,12 +261,14 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=CANDIDATES_HARD_LIMIT + solution_limit=CANDIDATES_HARD_LIMIT, ) if len(candidate_states_zero) <= len(candidate_states): if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Chosen {var}=0 without increasing candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + print( + f"[{node_id}] Chosen {var}=0 without increasing candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." + ) candidate_states = candidate_states_zero continue @@ -253,26 +277,37 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=len(candidate_states_zero) + solution_limit=len(candidate_states_zero), ) - if len(candidate_states_zero) == CANDIDATES_HARD_LIMIT and len(candidate_states_one) == CANDIDATES_HARD_LIMIT: - raise RuntimeError(f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT}).") + if ( + len(candidate_states_zero) == CANDIDATES_HARD_LIMIT + and len(candidate_states_one) == CANDIDATES_HARD_LIMIT + ): + raise RuntimeError( + f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT})." + ) if len(candidate_states_one) <= len(candidate_states): if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Chosen {var}=1 without increasing candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + print( + f"[{node_id}] Chosen {var}=1 without increasing candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." + ) candidate_states = candidate_states_one continue if len(candidate_states_zero) < len(candidate_states_one): if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Chosen {var}=0 with better candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + print( + f"[{node_id}] Chosen {var}=0 with better candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." + ) candidate_states = candidate_states_zero retained_set[var] = 0 else: if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] Chosen {var}=1 with better candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen.") + print( + f"[{node_id}] Chosen {var}=1 with better candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." + ) candidate_states = candidate_states_one retained_set[var] = 1 @@ -297,11 +332,15 @@ def compute_attractor_candidates( return [] if node_is_pseudo_minimal and len(candidate_states) == 1: if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done.") + print( + f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done." + ) return [candidate_states[0] | node_space] if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Attractor candidates from retained set: {len(candidate_states)}.") + print( + f"[{node_id}] > Attractor candidates from retained set: {len(candidate_states)}." + ) if simulation_minification: if biobalm.succession_diagram.DEBUG: @@ -316,7 +355,9 @@ def compute_attractor_candidates( iterations = 1024 while len(candidate_states) > 0: if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Start simulation with {len(candidate_states)} states and simulation limit {iterations}.") + print( + f"[{node_id}] > Start simulation with {len(candidate_states)} states and simulation limit {iterations}." + ) reduced = run_simulation_minification( node_id, graph_reduced, @@ -346,7 +387,9 @@ def compute_attractor_candidates( return [] if node_is_pseudo_minimal and len(candidate_states) == 1: if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done.") + print( + f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done." + ) return [candidate_states[0] | node_space] if pint_minification and not PINT_AVAILABLE: @@ -355,12 +398,16 @@ def compute_attractor_candidates( if biobalm.succession_diagram.DEBUG: print(f"[{node_id}] Start `pint` minification...") - children_bdd = state_list_to_bdd(graph_reduced.symbolic_context(), child_motifs_reduced) - candidates_bdd = state_list_to_bdd(graph_reduced.symbolic_context(), candidate_states) + children_bdd = state_list_to_bdd( + graph_reduced.symbolic_context(), child_motifs_reduced + ) + candidates_bdd = state_list_to_bdd( + graph_reduced.symbolic_context(), candidate_states + ) avoid_bdd = children_bdd.l_or(candidates_bdd) filtered_states = [] - for (i, state) in enumerate(candidate_states): + for i, state in enumerate(candidate_states): state_bdd = state_to_bdd(graph_reduced.symbolic_context(), state) avoid_bdd = avoid_bdd.l_and_not(state_bdd) @@ -377,7 +424,9 @@ def compute_attractor_candidates( filtered_states.append(state) if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > `pint` {i+1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}.") + print( + f"[{node_id}] > `pint` {i+1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}." + ) candidate_states = filtered_states @@ -440,7 +489,10 @@ def run_simulation_minification( s_var = symbolic_ctx.find_network_bdd_variable(var) assert s_var is not None symbolic_vars.append(s_var) - update_functions = {symbolic_vars[i]: graph.mk_update_function(network_vars[i]) for i in range(len(network_vars))} + update_functions = { + symbolic_vars[i]: graph.mk_update_function(network_vars[i]) + for i in range(len(network_vars)) + } # Use stochastic simulation to prune the set of candidate states. # We use different simulation approach depending on whether this space @@ -450,10 +502,12 @@ def run_simulation_minification( candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) filtered_candidates: list[BooleanSpace] = [] - for (i, state) in enumerate(candidate_states): + for i, state in enumerate(candidate_states): if i % 100 == 99 and biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Simulation progress: {i+1}/{len(candidate_states)}") + print( + f"[{node_id}] > Simulation progress: {i+1}/{len(candidate_states)}" + ) # Remove the state from the candidates. If we can prove that is # is not an attractor, we will put it back. @@ -518,9 +572,15 @@ def run_simulation_minification( for i in range(max_iterations): progress = int((i * len(candidate_states)) / max_iterations) - if progress % 100 == 99 and biobalm.succession_diagram.DEBUG and (progress not in printed): + if ( + progress % 100 == 99 + and biobalm.succession_diagram.DEBUG + and (progress not in printed) + ): printed.add(progress) - print(f"[{node_id}] > Simulation progress: {progress + 1}/{len(candidate_states)}") + print( + f"[{node_id}] > Simulation progress: {progress + 1}/{len(candidate_states)}" + ) generator.shuffle(symbolic_vars) new_candidates_bdd = symbolic_ctx.mk_constant(False) @@ -611,21 +671,21 @@ def asp_greedy_retained_set_optimization( retained_set_2, avoid_subspaces=avoid_dnf, # We don't need all solutions if the result isn't smaller. - solution_limit=len(candidate_states) + solution_limit=len(candidate_states), ) if len(candidate_states_2) < len(candidate_states): retained_set = retained_set_2 candidate_states = candidate_states_2 done = False if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Candidate states optimized to {len(candidate_states)}.") + print( + f"[{node_id}] > Candidate states optimized to {len(candidate_states)}." + ) return (retained_set, candidate_states) def make_heuristic_retained_set( - graph: AsynchronousGraph, - nfvs: list[str], - avoid_dnf: list[BooleanSpace] + graph: AsynchronousGraph, nfvs: list[str], avoid_dnf: list[BooleanSpace] ) -> BooleanSpace: """ Calculate the retained set for a given `space` based on heuristic criteria. diff --git a/biobalm/_sd_attractors/attractor_symbolic.py b/biobalm/_sd_attractors/attractor_symbolic.py index cf38f4f..e713830 100644 --- a/biobalm/_sd_attractors/attractor_symbolic.py +++ b/biobalm/_sd_attractors/attractor_symbolic.py @@ -16,7 +16,7 @@ def compute_attractors_symbolic( sd: SuccessionDiagram, node_id: int, candidate_states: list[BooleanSpace], - seeds_only: bool = False + seeds_only: bool = False, ) -> tuple[list[BooleanSpace], list[VertexSet] | None]: """ Uses exhaustive symbolic reachability to eliminate spurious candidate states @@ -58,7 +58,9 @@ def compute_attractors_symbolic( child_motifs_reduced = [] if node_data["expanded"]: children = sd.node_successors(node_id, compute=False) - child_motifs_reduced = [sd.edge_stable_motif(node_id, s, reduced=True) for s in children] + child_motifs_reduced = [ + sd.edge_stable_motif(node_id, s, reduced=True) for s in children + ] child_motifs_bdd = state_list_to_bdd(symbolic_ctx, child_motifs_reduced) candidate_bdd = state_list_to_bdd(symbolic_ctx, candidate_states_reduced) @@ -69,30 +71,29 @@ def compute_attractors_symbolic( avoid = candidate_set.union(children_set) if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Start symbolic seed state identification with {len(candidate_states)} candidates and {avoid} avoid states.") + print( + f"[{node_id}] > Start symbolic seed state identification with {len(candidate_states)} candidates and {avoid} avoid states." + ) seeds: list[BooleanSpace] = [] sets = [] - for (i, candidate) in enumerate(candidate_states_reduced): - is_last = (i == len(candidate_states_reduced) - 1) + for i, candidate in enumerate(candidate_states_reduced): + is_last = i == len(candidate_states_reduced) - 1 is_minimal = len(child_motifs_reduced) == 0 if seeds_only and is_minimal and is_last and len(seeds) == 0: # This node is pseudo-minimal, so it must contain at least # one attractor seed. If we only care about the attractor seeds, # we can thus return the last candidate without checking it. if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Single seed remaining in a (pseduo) minimal space. Done.") + print( + f"[{node_id}] > Single seed remaining in a (pseduo) minimal space. Done." + ) return ([candidate | node_space], None) candidate_singleton = graph_reduced.mk_subspace(candidate) avoid = avoid.minus(candidate_singleton) - closure = symbolic_attractor_test( - node_id, - graph_reduced, - candidate, - avoid - ) + closure = symbolic_attractor_test(node_id, graph_reduced, candidate, avoid) if closure is None: # This candidate can reach someone else in the candidate set, @@ -156,7 +157,7 @@ def symbolic_attractor_test( # Populate conflict vars, assuming we have any. if avoid is not None: avoid_bdd = avoid.to_bdd() - for (name, val) in pivot.items(): + for name, val in pivot.items(): var = graph.find_network_variable(name) bdd_var = graph.symbolic_context().find_network_bdd_variable(name) assert var is not None and bdd_var is not None @@ -167,11 +168,15 @@ def symbolic_attractor_test( # Remaining network variables that are still relevant, but may not # be necessary to reach `avoid`. - other_vars: list[VariableId] = [x for x in graph.network_variables() if x not in conflict_vars] + other_vars: list[VariableId] = [ + x for x in graph.network_variables() if x not in conflict_vars + ] other_vars = sort_variable_list(other_vars) if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Start symbolic reachability with {len(conflict_vars)} conflict variables and {len(other_vars)} other variables.") + print( + f"[{node_id}] > Start symbolic reachability with {len(conflict_vars)} conflict variables and {len(other_vars)} other variables." + ) all_done = False while not all_done: @@ -191,8 +196,13 @@ def symbolic_attractor_test( if not successors.is_empty(): reach_set = reach_set.union(successors) saturation_done = False - if reach_set.symbolic_size() > 100_000 and biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Saturation({len(saturated_vars)}) Expanded reach_set: {reach_set}") + if ( + reach_set.symbolic_size() > 100_000 + and biobalm.succession_diagram.DEBUG + ): + print( + f"[{node_id}] > Saturation({len(saturated_vars)}) Expanded reach_set: {reach_set}" + ) break # Once saturated, try to expand the saturated @@ -218,7 +228,9 @@ def symbolic_attractor_test( saturated_vars = sort_variable_list(saturated_vars) if biobalm.succession_diagram.DEBUG: - print(f"[{node_id}] > Saturation({len(saturated_vars)}) Added saturation variable. {len(conflict_vars)} conflict and {len(other_vars)} other variables remaining.") + print( + f"[{node_id}] > Saturation({len(saturated_vars)}) Added saturation variable. {len(conflict_vars)} conflict and {len(other_vars)} other variables remaining." + ) break if biobalm.succession_diagram.DEBUG: diff --git a/biobalm/succession_diagram.py b/biobalm/succession_diagram.py index f9c84ee..b8f76c5 100644 --- a/biobalm/succession_diagram.py +++ b/biobalm/succession_diagram.py @@ -746,14 +746,18 @@ def node_attractor_candidates( node_id, greedy_asp_minification, simulation_minification, - pint_minification + pint_minification, ) node["attractor_candidates"] = candidates # If the computed candidates are actually valid as seeds, just # propagate the value so that it doesn't need to be computed later. - node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal(node_id) - if len(candidates) == 0 or (node_is_pseudo_minimal and len(candidates) == 1): + node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal( + node_id + ) + if len(candidates) == 0 or ( + node_is_pseudo_minimal and len(candidates) == 1 + ): node["attractor_seeds"] = candidates return candidates @@ -797,16 +801,17 @@ def node_attractor_seeds( # Typically, this should be done when computing the candidates, but just in case # something illegal happended... if we can show that the current candidate set # is optimal, we just keep it and don't compute the attractors symbolically. - node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal(node_id) - if len(candidates) == 0 or (node_is_pseudo_minimal and len(candidates) == 1): + node_is_pseudo_minimal = (not node["expanded"]) or self.node_is_minimal( + node_id + ) + if len(candidates) == 0 or ( + node_is_pseudo_minimal and len(candidates) == 1 + ): node["attractor_seeds"] = candidates seeds = candidates else: result = compute_attractors_symbolic( - self, - node_id, - candidate_states=candidates, - seeds_only=True + self, node_id, candidate_states=candidates, seeds_only=True ) node["attractor_seeds"] = result[0] # At this point, attractor_sets could be `None`, but that @@ -894,7 +899,10 @@ def node_percolated_nfvs(self, node_id: int, compute: bool = False) -> list[str] if node["percolated_nfvs"] is None: percolated_network = self.node_percolated_network(node_id, compute) - if percolated_network.variable_count() < SuccessionDiagram.NFVS_NODE_THRESHOLD: + if ( + percolated_network.variable_count() + < SuccessionDiagram.NFVS_NODE_THRESHOLD + ): # Computing the *negative* variant of the FVS is surprisingly costly. # Hence it mostly makes sense for the smaller networks only. nfvs = feedback_vertex_set(percolated_network, parity="negative") @@ -906,7 +914,9 @@ def node_percolated_nfvs(self, node_id: int, compute: bool = False) -> list[str] return nfvs - def node_percolated_network(self, node_id: int, compute: bool = False) -> BooleanNetwork: + def node_percolated_network( + self, node_id: int, compute: bool = False + ) -> BooleanNetwork: """ The Boolean network percolated to the node's sub-space, with constant variables removed. @@ -943,9 +953,13 @@ def node_percolated_network(self, node_id: int, compute: bool = False) -> Boolea raise KeyError(f"Percolated network not computed for node {node_id}.") if network is None: - network = percolate_network(self.network, node_space, self.symbolic, remove_constants=True) + network = percolate_network( + self.network, node_space, self.symbolic, remove_constants=True + ) if DEBUG: - print(f"[{node_id}] Computed percolated network with {network.variable_count()} variables (vs {self.network.variable_count()}).") + print( + f"[{node_id}] Computed percolated network with {network.variable_count()} variables (vs {self.network.variable_count()})." + ) node["percolated_network"] = network return network @@ -1328,7 +1342,9 @@ def _expand_one_node(self, node_id: int): ) if len(sub_spaces) == SuccessionDiagram.MAX_MOTIFS_PER_NODE: - raise RuntimeError(f"Exceeded the maximum amount of stable motifs per node (SuccessionDiagram.MAX_MOTIFS_PER_NODE={SuccessionDiagram.MAX_MOTIFS_PER_NODE}).") + raise RuntimeError( + f"Exceeded the maximum amount of stable motifs per node (SuccessionDiagram.MAX_MOTIFS_PER_NODE={SuccessionDiagram.MAX_MOTIFS_PER_NODE})." + ) # Sort the spaces based on a unique key in case trappist is not always # sorted deterministically. diff --git a/biobalm/symbolic_utils.py b/biobalm/symbolic_utils.py index f6cb78b..0ae1852 100644 --- a/biobalm/symbolic_utils.py +++ b/biobalm/symbolic_utils.py @@ -146,7 +146,7 @@ def valuation_to_state(ctx: SymbolicContext, valuation: BddValuation) -> Boolean """ result: BooleanSpace = {} - for (var, val) in valuation.items(): + for var, val in valuation.items(): n_var = ctx.find_network_variable(var) if n_var is not None: result[ctx.get_network_variable_name(n_var)] = cast(Literal[0, 1], int(val)) From 523869294f60f5a7c4412230fdc1ba62dfc243ff Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 09:42:49 +0200 Subject: [PATCH 06/18] Print changes recommended by black. --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cb298aa..27ef813 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,7 +24,7 @@ jobs: - name: Lint with Black uses: rickstaa/action-black@v1 with: - black_args: ". --check" + black_args: ". --check --diff" - name: Check types run: | mypy biobalm From b960619aef3bb75b580844d78667cc21001133e7 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 09:44:42 +0200 Subject: [PATCH 07/18] Formatting. --- biobalm/_sd_attractors/attractor_candidates.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index fdc52c5..a62fdba 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -498,12 +498,10 @@ def run_simulation_minification( # We use different simulation approach depending on whether this space # is a minimal trap or not. if not avoid_bdd.is_false(): - candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) filtered_candidates: list[BooleanSpace] = [] for i, state in enumerate(candidate_states): - if i % 100 == 99 and biobalm.succession_diagram.DEBUG: print( f"[{node_id}] > Simulation progress: {i+1}/{len(candidate_states)}" @@ -570,7 +568,6 @@ def run_simulation_minification( candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) printed = set() for i in range(max_iterations): - progress = int((i * len(candidate_states)) / max_iterations) if ( progress % 100 == 99 From bee0c11cf95b9b9e14866e000e09edc1b37ce2eb Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 17:48:29 +0200 Subject: [PATCH 08/18] Release memory sooner to avoid wasting it on temporary values. --- biobalm/succession_diagram.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/biobalm/succession_diagram.py b/biobalm/succession_diagram.py index b8f76c5..4f12d79 100644 --- a/biobalm/succession_diagram.py +++ b/biobalm/succession_diagram.py @@ -820,6 +820,13 @@ def node_attractor_seeds( node["attractor_sets"] = result[1] seeds = result[0] + # Release memory once attractor seeds are known. We might need these + # for attractor set computation later, but only if the seeds are not empty. + if len(seeds) == 0: + node["percolated_network"] = None + node["percolated_nfvs"] = None + node["percolated_petri_net"] = None + return seeds def node_attractor_sets( @@ -858,7 +865,11 @@ def node_attractor_sets( if sets is None: seeds = self.node_attractor_seeds(node_id, compute=True) - result = compute_attractors_symbolic(self, node_id, candidate_states=seeds) + result: tuple[list[BooleanSpace], list[VertexSet] | None] = ([], []) + if len(seeds) > 0: + result = compute_attractors_symbolic( + self, node_id, candidate_states=seeds + ) assert result[1] is not None node["attractor_sets"] = result[1] sets = result[1] @@ -1319,7 +1330,7 @@ def _expand_one_node(self, node_id: int): sub_spaces: list[BooleanSpace] # Only use the percolated PN if it is already known. - pn = self.node_percolated_petri_net(node_id, compute=False) + pn = node["percolated_petri_net"] if pn is not None: # We have a pre-propagated PN for this sub-space, hence we can use # that to compute the trap spaces. @@ -1341,6 +1352,12 @@ def _expand_one_node(self, node_id: int): solution_limit=SuccessionDiagram.MAX_MOTIFS_PER_NODE, ) + # Release the Petri net once the sub_spaces are computed. + # It might be needed later for attractor computation, but it + # uses a lot of memory in large diagrams to keep all the nets + # in memory. + node["percolated_petri_net"] = None + if len(sub_spaces) == SuccessionDiagram.MAX_MOTIFS_PER_NODE: raise RuntimeError( f"Exceeded the maximum amount of stable motifs per node (SuccessionDiagram.MAX_MOTIFS_PER_NODE={SuccessionDiagram.MAX_MOTIFS_PER_NODE})." From 1054b3d74499fe2ea65a65a76c16baf1970ea2e4 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 17:48:54 +0200 Subject: [PATCH 09/18] Minor bug in seed-guided expansion. --- biobalm/_sd_algorithms/expand_attractor_seeds.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/biobalm/_sd_algorithms/expand_attractor_seeds.py b/biobalm/_sd_algorithms/expand_attractor_seeds.py index c72a4b7..65c4cc4 100644 --- a/biobalm/_sd_algorithms/expand_attractor_seeds.py +++ b/biobalm/_sd_algorithms/expand_attractor_seeds.py @@ -82,6 +82,10 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) intersect(successor_space, child) for child in expanded_motifs ] avoid = [x for x in avoid_or_none if x is not None] + avoid_restricted = [] + for x in avoid: + y = {var: val for (var, val) in x.items() if var not in successor_space} + avoid_restricted.append(y) retained_set = make_heuristic_retained_set( successor_graph, successor_nfvs, avoid @@ -90,7 +94,7 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) successor_seeds = compute_fixed_point_reduced_STG( successor_pn, retained_set, - avoid_subspaces=avoid, + avoid_subspaces=avoid_restricted, solution_limit=1, ) From fea3c20752c70c5ece5b1dbfa18203d7088e3e72 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 11 Apr 2024 20:37:15 +0200 Subject: [PATCH 10/18] Formatting. --- biobalm/_sd_attractors/__init__.py | 1 - biobalm/_sd_attractors/attractor_candidates.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/biobalm/_sd_attractors/__init__.py b/biobalm/_sd_attractors/__init__.py index 8b13789..e69de29 100644 --- a/biobalm/_sd_attractors/__init__.py +++ b/biobalm/_sd_attractors/__init__.py @@ -1 +0,0 @@ - diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index a62fdba..8838fa7 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -425,7 +425,7 @@ def compute_attractor_candidates( if biobalm.succession_diagram.DEBUG: print( - f"[{node_id}] > `pint` {i+1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}." + f"[{node_id}] > `pint` {i + 1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}." ) candidate_states = filtered_states @@ -504,7 +504,7 @@ def run_simulation_minification( for i, state in enumerate(candidate_states): if i % 100 == 99 and biobalm.succession_diagram.DEBUG: print( - f"[{node_id}] > Simulation progress: {i+1}/{len(candidate_states)}" + f"[{node_id}] > Simulation progress: {i + 1}/{len(candidate_states)}" ) # Remove the state from the candidates. If we can prove that is From 76d9879c392aaaa4358b741f1ec77a08bff53cb9 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:39:21 +0200 Subject: [PATCH 11/18] Do not require `mole` in CI. --- .github/workflows/test.yml | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 27ef813..bdf1b4b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,12 +52,6 @@ jobs: run: | wget https://github.com/pauleve/pint/releases/download//2019-05-24/pint_2019-05-24_amd64.deb sudo apt install ./pint_2019-05-24_amd64.deb - - name: Fetch and install Mole - run: | - wget http://www.lsv.fr/~schwoon/tools/mole/mole-140428.tar.gz - tar -xvf mole-140428.tar.gz - (cd ./mole-140428 && make) - (cd ./mole-140428 && pwd >> $GITHUB_PATH) - name: Install repo dependencies run: pip install -r requirements.txt - name: Run doctests @@ -81,12 +75,6 @@ jobs: run: | wget https://github.com/pauleve/pint/releases/download//2019-05-24/pint_2019-05-24_amd64.deb sudo apt install ./pint_2019-05-24_amd64.deb - - name: Fetch and install Mole - run: | - wget http://www.lsv.fr/~schwoon/tools/mole/mole-140428.tar.gz - tar -xvf mole-140428.tar.gz - (cd ./mole-140428 && make) - (cd ./mole-140428 && pwd >> $GITHUB_PATH) - name: Install repo dependencies run: pip install -r requirements.txt - name: Install pytest @@ -117,12 +105,6 @@ jobs: run: | wget https://github.com/pauleve/pint/releases/download//2019-05-24/pint_2019-05-24_amd64.deb sudo apt install ./pint_2019-05-24_amd64.deb - - name: Fetch and install Mole - run: | - wget http://www.lsv.fr/~schwoon/tools/mole/mole-140428.tar.gz - tar -xvf mole-140428.tar.gz - (cd ./mole-140428 && make) - (cd ./mole-140428 && pwd >> $GITHUB_PATH) - name: Install repo dependencies run: pip install -r requirements.txt - name: Install pytest From 5da11a74daa4351787cd0aa7f60a13bc96d4fc7e Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:39:31 +0200 Subject: [PATCH 12/18] Make `pint` optional in `requirements.txt` --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d1b58b1..2730581 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ biodivine_aeon==1.0.0a6 clingo==5.6.2 networkx==2.8.8 -pypint==1.6.2 \ No newline at end of file +pypint[pint]==1.6.2 \ No newline at end of file From 17fe0b85feb9b65e284e157768285bd02f7e8c47 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:40:08 +0200 Subject: [PATCH 13/18] Update version. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c7dc6e0..71aed3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ 'clingo >=5.6.2', 'networkx >=2.8.8', ] -version = "0.1.1" +version = "0.2.0" # Pint can be used for static analysis, but is not mandatory. [project.optional-dependencies] From 9b6782dcbbcacf59c387ac3a2e34b2021b1409f9 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:44:40 +0200 Subject: [PATCH 14/18] Typing in `pint_reachability`. --- biobalm/_pint_reachability.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/biobalm/_pint_reachability.py b/biobalm/_pint_reachability.py index d8e05e1..2bb2aab 100644 --- a/biobalm/_pint_reachability.py +++ b/biobalm/_pint_reachability.py @@ -9,7 +9,7 @@ from __future__ import annotations from functools import reduce -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast, Iterable from networkx import DiGraph # type: ignore from pypint import Goal, InMemoryModel # type:ignore @@ -127,23 +127,23 @@ def _petri_net_as_automata_network(petri_net: DiGraph) -> str: if kind != "transition": continue - predecessors = set(petri_net.predecessors(transition)) # type: ignore - successors = set(petri_net.successors(transition)) # type: ignore + predecessors = set(cast(Iterable[str], petri_net.predecessors(transition))) + successors = set(cast(Iterable[str], petri_net.successors(transition))) # The value under modification is the only # value that is different between successors and predecessors. - source_place = next(iter(predecessors - successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - target_place = next(iter(successors - predecessors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa + source_place = next(iter(predecessors - successors)) + target_place = next(iter(successors - predecessors)) - (s_var, s_level) = place_to_variable(source_place) # type: ignore[reportUnknownArgumentType] # noqa - (t_var, t_level) = place_to_variable(target_place) # type: ignore[reportUnknownArgumentType] # noqa + (s_var, s_level) = place_to_variable(source_place) + (t_var, t_level) = place_to_variable(target_place) assert s_var == t_var # The remaining places represent the necessary conditions. # Here, we transform them into a text format. - conditions = sorted(predecessors.intersection(successors)) # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - conditions = [place_to_variable(p) for p in conditions] # type: ignore[reportUnknownVariableType,reportUnknownArgumentType] # noqa - conditions = [f'"{var}"={int(level)}' for var, level in conditions] + condition_places = sorted(predecessors.intersection(successors)) + condition_tuples = [place_to_variable(p) for p in condition_places] + conditions = [f'"{var}"={int(level)}' for var, level in condition_tuples] # A pint rule consists of a variable name, value transition, # and a list of necessary conditions for the transition (if any). From beb614e117d16c0c42e090555c0dc35c1f4b7540 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:48:48 +0200 Subject: [PATCH 15/18] Nicer printing. --- biobalm/_sd_attractors/attractor_candidates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index 8838fa7..c746a2b 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -212,7 +212,7 @@ def compute_attractor_candidates( ) if len(candidate_states) == CANDIDATES_HARD_LIMIT: raise RuntimeError( - f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT})." + f"Exceeded the maximum amount of attractor candidates ({CANDIDATES_HARD_LIMIT=})." ) if biobalm.succession_diagram.DEBUG: print( @@ -285,7 +285,7 @@ def compute_attractor_candidates( and len(candidate_states_one) == CANDIDATES_HARD_LIMIT ): raise RuntimeError( - f"Exceeded the maximum amount of attractor candidates (CANDIDATES_HARD_LIMIT={CANDIDATES_HARD_LIMIT})." + f"Exceeded the maximum amount of attractor candidates ({CANDIDATES_HARD_LIMIT=})." ) if len(candidate_states_one) <= len(candidate_states): From e7bb36c149edde832830e06a068b615e5edefb9e Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 10:52:00 +0200 Subject: [PATCH 16/18] Fix documentation headings. --- biobalm/_sd_attractors/attractor_candidates.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index c746a2b..05979ec 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -65,21 +65,21 @@ def compute_attractor_candidates( Subsequently, the method can further optimize this set of candidates. - ### Greedy ASP minification - + Greedy ASP minification + ----------------------- Depending on which values are chosen for the NFVS nodes, a different set of candidates is obtained. This optimization method flips the initial NFVS node values and retains modifications that lead to a smaller candidate set, until a local minimum is achieved. - ### Simulation minification - + Simulation minification + ----------------------- It is often possible to reduce the candidate set using a pseudo-random walk which eventually leads to one of the child spaces, or to one of the existing candidate states. - ### Pint minification - + Pint minification + ----------------- The tool `pint` can perform (incomplete) reachability check for any network state, hence we can also use it to reduce the list of candidate states. This option is only available if pint is actually installed. From 7bc242d51f36b66773234cdda93c99edc6458288 Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 17:09:30 +0200 Subject: [PATCH 17/18] New configuration approach. --- biobalm/_pint_reachability.py | 39 ++---- .../_sd_algorithms/expand_attractor_seeds.py | 15 ++- .../_sd_attractors/attractor_candidates.py | 121 +++++++++--------- biobalm/_sd_attractors/attractor_symbolic.py | 39 +++--- biobalm/petri_net_translation.py | 14 +- biobalm/succession_diagram.py | 95 +++++++------- biobalm/types.py | 75 +++++++++++ 7 files changed, 230 insertions(+), 168 deletions(-) diff --git a/biobalm/_pint_reachability.py b/biobalm/_pint_reachability.py index 2bb2aab..7973211 100644 --- a/biobalm/_pint_reachability.py +++ b/biobalm/_pint_reachability.py @@ -16,32 +16,19 @@ from biobalm.petri_net_translation import ( place_to_variable, - _optimized_recursive_dnf_generator, + optimized_recursive_dnf_generator, ) -from biobalm.types import BooleanSpace -import biobalm +from biobalm.types import BooleanSpace, SuccessionDiagramConfiguration if TYPE_CHECKING: from biodivine_aeon import Bdd -GOAL_SIZE_LIMIT = 8192 -""" -Pint is called using command line and can only accept a limited number of arguments. -This limit is applied when constructing goals to avoid reaching this argument limit. - -The limit currently applies to the total number of literals that can be used to -represent a goal. - -The default value was empirically tested as safe on Debian linux, but other operating -systems may need a different limit to stay safe. Nevertheless, this should not be -an issue on smaller/simpler networks. -""" - -def _pint_reachability( +def pint_reachability( petri_net: DiGraph, initial_state: BooleanSpace, target_states: Bdd, + config: SuccessionDiagramConfiguration, ) -> bool: """ Use Pint to check if a given `initial_state` can possibly reach some state @@ -59,15 +46,17 @@ def _pint_reachability( for var, level in initial_state.items(): pint_model.initial_state[var] = level - goal = _pint_build_symbolic_goal(target_states) + goal = _pint_build_symbolic_goal(target_states, config) - def failed(_model, _goal): + def failed(*_): raise RuntimeError("Cannot verify.") return pint_model.reachability(goal=goal, fallback=failed) # type: ignore -def _pint_build_symbolic_goal(states: Bdd) -> Goal: +def _pint_build_symbolic_goal( + states: Bdd, config: SuccessionDiagramConfiguration +) -> Goal: """ A helper method which (very explicitly) converts a set of states represented through a BDD into a Pint `Goal`. @@ -80,8 +69,8 @@ def _pint_build_symbolic_goal(states: Bdd) -> Goal: assert not states.is_false() goals: list[Goal] = [] - limit = GOAL_SIZE_LIMIT - for clause in _optimized_recursive_dnf_generator(states): + limit = config["pint_goal_size_limit"] + for clause in optimized_recursive_dnf_generator(states): named_clause = { states.__ctx__().get_variable_name(var): int(value) for var, value in clause.items() @@ -92,7 +81,7 @@ def _pint_build_symbolic_goal(states: Bdd) -> Goal: # If the goal is too large to be passed as a command line argument, # break here and don't continue. This is not ideal but I'm not sure # how to fix this other than modifying `pint` itself. - if biobalm.succession_diagram.DEBUG: + if config["debug"]: print( "WARNING: `pint` goal size limit exceeded. A partial goal is used." ) @@ -127,8 +116,8 @@ def _petri_net_as_automata_network(petri_net: DiGraph) -> str: if kind != "transition": continue - predecessors = set(cast(Iterable[str], petri_net.predecessors(transition))) - successors = set(cast(Iterable[str], petri_net.successors(transition))) + predecessors = set(cast(Iterable[str], petri_net.predecessors(transition))) # type: ignore + successors = set(cast(Iterable[str], petri_net.successors(transition))) # type: ignore # The value under modification is the only # value that is different between successors and predecessors. diff --git a/biobalm/_sd_algorithms/expand_attractor_seeds.py b/biobalm/_sd_algorithms/expand_attractor_seeds.py index 65c4cc4..b0d85dc 100644 --- a/biobalm/_sd_algorithms/expand_attractor_seeds.py +++ b/biobalm/_sd_algorithms/expand_attractor_seeds.py @@ -5,8 +5,7 @@ if TYPE_CHECKING: from biobalm.succession_diagram import SuccessionDiagram -import biobalm -import biobalm.succession_diagram +from biobalm.types import BooleanSpace from biobalm.space_utils import intersect from biobalm.trappist_core import compute_fixed_point_reduced_STG from biobalm._sd_attractors.attractor_candidates import make_heuristic_retained_set @@ -25,7 +24,7 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) # motif-avoidant attractors. sd.expand_minimal_spaces(size_limit) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( "Minimal trap space expansion finished. Proceeding to attractor expansion." ) @@ -82,9 +81,11 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) intersect(successor_space, child) for child in expanded_motifs ] avoid = [x for x in avoid_or_none if x is not None] - avoid_restricted = [] + avoid_restricted: list[BooleanSpace] = [] for x in avoid: - y = {var: val for (var, val) in x.items() if var not in successor_space} + y: BooleanSpace = { + var: val for (var, val) in x.items() if var not in successor_space + } avoid_restricted.append(y) retained_set = make_heuristic_retained_set( @@ -105,7 +106,7 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) successors.pop() continue - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node}] Found successor with new attractor candidate seeds. Expand node {successors[-1]}." ) @@ -114,7 +115,7 @@ def expand_attractor_seeds(sd: SuccessionDiagram, size_limit: int | None = None) if len(successors) == 0: # Everything is done for this `node` and we can continue to the next one. - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node}] Finished node attractor expansion.") continue diff --git a/biobalm/_sd_attractors/attractor_candidates.py b/biobalm/_sd_attractors/attractor_candidates.py index 05979ec..a0de3c9 100644 --- a/biobalm/_sd_attractors/attractor_candidates.py +++ b/biobalm/_sd_attractors/attractor_candidates.py @@ -16,29 +16,15 @@ import random import biobalm -from biodivine_aeon import Bdd, AsynchronousGraph +from biodivine_aeon import Bdd, AsynchronousGraph, BddVariable from biobalm.trappist_core import compute_fixed_point_reduced_STG from biobalm.symbolic_utils import state_list_to_bdd, valuation_to_state, state_to_bdd try: - PINT_AVAILABLE = True - import biobalm._pint_reachability as pint + pint_available = True + import biobalm._pint_reachability except ModuleNotFoundError: - PINT_AVAILABLE = False - - -CANDIDATES_HARD_LIMIT = 100_000 -""" -If there are more than this amount of attractor candidates, then -attractor detection will fail with a runtime error. This is mainly to -avoid out-of-memory errors or crashing clingo. -""" - -RETAINED_SET_OPTIMIZATION_THRESHOLD = 100 -""" -If there are more than this amount of attractor candidates, we will -try to optimize the retained set using ASP (if enabled). -""" + pint_available = False def compute_attractor_candidates( @@ -103,7 +89,7 @@ def compute_attractor_candidates( The list of attractor candidate states. """ - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] Start computing attractor candidates.") node_data = sd.node_data(node_id) @@ -111,13 +97,13 @@ def compute_attractor_candidates( node_space = node_data["space"] if len(node_space) == sd.network.variable_count(): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Attractor candidates done: node is a fixed-point.") return [node_space] node_nfvs = sd.node_percolated_nfvs(node_id, compute=True) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: root_nfvs = sd.node_percolated_nfvs(sd.root(), compute=True) print( f"[{node_id}] > Percolated node NFVS contains {len(node_nfvs)} nodes (vs {len(root_nfvs)} in the root)." @@ -140,7 +126,7 @@ def compute_attractor_candidates( # In either case, the space must contain at least one attractor. node_is_pseudo_minimal = len(child_motifs_reduced) == 0 - if biobalm.succession_diagram.DEBUG and node_is_pseudo_minimal: + if sd.config["debug"] and node_is_pseudo_minimal: print( f"[{node_id}] > The node has no children (i.e. it is minimal or unexpanded)." ) @@ -162,7 +148,7 @@ def compute_attractor_candidates( assert not sd.node_is_minimal(node_id) if not node_is_pseudo_minimal: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Attractor candidates done: empty NFVS in a non-minimal space." ) @@ -197,7 +183,7 @@ def compute_attractor_candidates( if len(retained_set) == sd.network.variable_count() and node_is_pseudo_minimal: # If the retained set describes a fixed point, then only one attractor # is present in this space and it must contain the state described by the retained set. - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Singular attractor found through fixed-point retained set. Done." ) @@ -208,13 +194,13 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=CANDIDATES_HARD_LIMIT, + solution_limit=sd.config["attractor_candidates_limit"], ) - if len(candidate_states) == CANDIDATES_HARD_LIMIT: + if len(candidate_states) == sd.config["attractor_candidates_limit"]: raise RuntimeError( - f"Exceeded the maximum amount of attractor candidates ({CANDIDATES_HARD_LIMIT=})." + f"Exceeded the maximum amount of attractor candidates ({sd.config['attractor_candidates_limit']}; see `SuccessionDiagramConfiguation.attractor_candidates_limit`)." ) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Computed {len(candidate_states)} candidate states without retained set optimization." ) @@ -223,21 +209,22 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=RETAINED_SET_OPTIMIZATION_THRESHOLD, + solution_limit=sd.config["retained_set_optimization_threshold"], ) - if len(candidate_states) < RETAINED_SET_OPTIMIZATION_THRESHOLD: + if len(candidate_states) < sd.config["retained_set_optimization_threshold"]: # The candidate set is small. No need to overthink it. The original # retained set is probably quite good. However, we can still try to # optimize it further if it makes sense. if len(candidate_states) > 1 or ( not node_is_pseudo_minimal and len(candidate_states) > 0 ): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Initial retained set generated {len(candidate_states)} candidates. Optimizing..." ) optimized = asp_greedy_retained_set_optimization( + sd, node_id, petri_net=pn_reduced, retained_set=retained_set, @@ -249,9 +236,9 @@ def compute_attractor_candidates( else: # There seem to be many candidates, in which case it might be better # to optimize the retained set dynamically. - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( - f"[{node_id}] Initial retained set generated >{RETAINED_SET_OPTIMIZATION_THRESHOLD} candidates. Regenerate retained set." + f"[{node_id}] Initial retained set generated >{sd.config['retained_set_optimization_threshold']} candidates. Regenerate retained set." ) retained_set = {} candidate_states = [] @@ -261,11 +248,11 @@ def compute_attractor_candidates( pn_reduced, retained_set, avoid_subspaces=child_motifs_reduced, - solution_limit=CANDIDATES_HARD_LIMIT, + solution_limit=sd.config["attractor_candidates_limit"], ) if len(candidate_states_zero) <= len(candidate_states): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Chosen {var}=0 without increasing candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." ) @@ -281,15 +268,17 @@ def compute_attractor_candidates( ) if ( - len(candidate_states_zero) == CANDIDATES_HARD_LIMIT - and len(candidate_states_one) == CANDIDATES_HARD_LIMIT + len(candidate_states_zero) + == sd.config["attractor_candidates_limit"] + and len(candidate_states_one) + == sd.config["attractor_candidates_limit"] ): raise RuntimeError( - f"Exceeded the maximum amount of attractor candidates ({CANDIDATES_HARD_LIMIT=})." + f"Exceeded the maximum amount of attractor candidates ({sd.config['attractor_candidates_limit']}; see `SuccessionDiagramConfiguation.attractor_candidates_limit`)." ) if len(candidate_states_one) <= len(candidate_states): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Chosen {var}=1 without increasing candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." ) @@ -297,14 +286,14 @@ def compute_attractor_candidates( continue if len(candidate_states_zero) < len(candidate_states_one): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Chosen {var}=0 with better candidate count ({len(candidate_states_zero)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." ) candidate_states = candidate_states_zero retained_set[var] = 0 else: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] Chosen {var}=1 with better candidate count ({len(candidate_states_one)}). {len(retained_set)}/{len(node_nfvs)} variables chosen." ) @@ -313,9 +302,10 @@ def compute_attractor_candidates( # At this point, we know the candidate count increased and so we should # try to bring it back down. - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] Optimizing partial retained set...") optimized = asp_greedy_retained_set_optimization( + sd, node_id, petri_net=pn_reduced, retained_set=retained_set, @@ -327,23 +317,23 @@ def compute_attractor_candidates( # Terminate if done. if len(candidate_states) == 0: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Initial candidate set empty. Done.") return [] if node_is_pseudo_minimal and len(candidate_states) == 1: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done." ) return [candidate_states[0] | node_space] - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Attractor candidates from retained set: {len(candidate_states)}." ) if simulation_minification: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] Start simulation minification...") avoid_children_symbolic = state_list_to_bdd( graph_reduced.symbolic_context(), child_motifs_reduced @@ -354,11 +344,12 @@ def compute_attractor_candidates( # cannot reduce any further states, we are done. iterations = 1024 while len(candidate_states) > 0: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Start simulation with {len(candidate_states)} states and simulation limit {iterations}." ) reduced = run_simulation_minification( + sd, node_id, graph_reduced, candidate_states, @@ -377,25 +368,25 @@ def compute_attractor_candidates( if len(candidate_states) == 1 and avoid_children_symbolic.is_false(): break - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Candidates after simulation: {len(candidate_states)}") # Terminate if done. if len(candidate_states) == 0: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Candidate set empty. Done.") return [] if node_is_pseudo_minimal and len(candidate_states) == 1: - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Single candidate found in (pseudo) minimal trap space. Done." ) return [candidate_states[0] | node_space] - if pint_minification and not PINT_AVAILABLE: + if pint_minification and not pint_available: print("WARNING: Using `pint`, but `pint` is not installed. Skipping.") - elif pint_minification and PINT_AVAILABLE: - if biobalm.succession_diagram.DEBUG: + elif pint_minification and pint_available: + if sd.config["debug"]: print(f"[{node_id}] Start `pint` minification...") children_bdd = state_list_to_bdd( @@ -406,7 +397,7 @@ def compute_attractor_candidates( ) avoid_bdd = children_bdd.l_or(candidates_bdd) - filtered_states = [] + filtered_states: list[BooleanSpace] = [] for i, state in enumerate(candidate_states): state_bdd = state_to_bdd(graph_reduced.symbolic_context(), state) @@ -414,7 +405,9 @@ def compute_attractor_candidates( keep = True try: - if pint._pint_reachability(pn_reduced, state, avoid_bdd): + if biobalm._pint_reachability.pint_reachability( + pn_reduced, state, avoid_bdd, sd.config + ): keep = False except RuntimeError as e: assert str(e) == "Cannot verify." @@ -423,14 +416,14 @@ def compute_attractor_candidates( avoid_bdd = avoid_bdd.l_or(state_bdd) filtered_states.append(state) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > `pint` {i + 1}/{len(candidate_states)}: eliminated: {not keep}, retained: {len(filtered_states)}." ) candidate_states = filtered_states - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Candidates after `pint`: {len(candidate_states)}") # Finally, we need to augment each candidate state with the @@ -442,6 +435,7 @@ def compute_attractor_candidates( def run_simulation_minification( + sd: SuccessionDiagram, node_id: int, graph: AsynchronousGraph, candidate_states: list[BooleanSpace], @@ -484,7 +478,7 @@ def run_simulation_minification( # associate them with the corresponding network's update functions. symbolic_ctx = graph.symbolic_context() network_vars = graph.network_variables() - symbolic_vars = [] + symbolic_vars: list[BddVariable] = [] for var in network_vars: s_var = symbolic_ctx.find_network_bdd_variable(var) assert s_var is not None @@ -502,7 +496,7 @@ def run_simulation_minification( filtered_candidates: list[BooleanSpace] = [] for i, state in enumerate(candidate_states): - if i % 100 == 99 and biobalm.succession_diagram.DEBUG: + if i % 100 == 99 and sd.config["debug"]: print( f"[{node_id}] > Simulation progress: {i + 1}/{len(candidate_states)}" ) @@ -566,12 +560,12 @@ def run_simulation_minification( # and (b) we can stop with one candidate instead of zero. candidates_bdd = state_list_to_bdd(symbolic_ctx, candidate_states) - printed = set() + printed: set[int] = set() for i in range(max_iterations): progress = int((i * len(candidate_states)) / max_iterations) if ( progress % 100 == 99 - and biobalm.succession_diagram.DEBUG + and sd.config["debug"] and (progress not in printed) ): printed.add(progress) @@ -613,6 +607,7 @@ def run_simulation_minification( def asp_greedy_retained_set_optimization( + sd: SuccessionDiagram, node_id: int, petri_net: DiGraph, retained_set: BooleanSpace, @@ -674,7 +669,7 @@ def asp_greedy_retained_set_optimization( retained_set = retained_set_2 candidate_states = candidate_states_2 done = False - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Candidate states optimized to {len(candidate_states)}." ) @@ -723,7 +718,7 @@ def make_heuristic_retained_set( A :class:`BooleanSpace` object describing the retained set. """ - retained_set = {} + retained_set: BooleanSpace = {} # First, if there are any child spaces present, we extend the retained set # with the values from the one that has the least amount of fixed variables diff --git a/biobalm/_sd_attractors/attractor_symbolic.py b/biobalm/_sd_attractors/attractor_symbolic.py index e713830..12cd0bb 100644 --- a/biobalm/_sd_attractors/attractor_symbolic.py +++ b/biobalm/_sd_attractors/attractor_symbolic.py @@ -4,10 +4,9 @@ if TYPE_CHECKING: from biobalm.succession_diagram import SuccessionDiagram - from biodivine_aeon import VariableId, VertexSet + from biodivine_aeon import VariableId -import biobalm -from biodivine_aeon import AsynchronousGraph, ColoredVertexSet +from biodivine_aeon import AsynchronousGraph, ColoredVertexSet, VertexSet from biobalm.symbolic_utils import state_list_to_bdd from biobalm.types import BooleanSpace @@ -50,10 +49,12 @@ def compute_attractors_symbolic( # Ignore variables that are already fixed in the nodes space. # These are not used in the reduced network. - candidate_states_reduced = [] + candidate_states_reduced: list[BooleanSpace] = [] for candidate in candidate_states: - candidate = {k: v for (k, v) in candidate.items() if k not in node_space} - candidate_states_reduced.append(candidate) + candidate_reduced: BooleanSpace = { + k: v for (k, v) in candidate.items() if k not in node_space + } + candidate_states_reduced.append(candidate_reduced) child_motifs_reduced = [] if node_data["expanded"]: @@ -70,13 +71,13 @@ def compute_attractors_symbolic( avoid = candidate_set.union(children_set) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Start symbolic seed state identification with {len(candidate_states)} candidates and {avoid} avoid states." ) seeds: list[BooleanSpace] = [] - sets = [] + sets: list[ColoredVertexSet] = [] for i, candidate in enumerate(candidate_states_reduced): is_last = i == len(candidate_states_reduced) - 1 is_minimal = len(child_motifs_reduced) == 0 @@ -84,7 +85,7 @@ def compute_attractors_symbolic( # This node is pseudo-minimal, so it must contain at least # one attractor seed. If we only care about the attractor seeds, # we can thus return the last candidate without checking it. - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Single seed remaining in a (pseduo) minimal space. Done." ) @@ -93,7 +94,7 @@ def compute_attractors_symbolic( avoid = avoid.minus(candidate_singleton) - closure = symbolic_attractor_test(node_id, graph_reduced, candidate, avoid) + closure = symbolic_attractor_test(sd, node_id, graph_reduced, candidate, avoid) if closure is None: # This candidate can reach someone else in the candidate set, @@ -107,11 +108,11 @@ def compute_attractors_symbolic( seeds.append(candidate | node_space) sets.append(closure) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Finished identification with {len(seeds)} seed states.") space_symbolic = sd.symbolic.mk_subspace(node_space).vertices() - sets_converted = [] + sets_converted: list[VertexSet] = [] for s in sets: # Extend the attractor set with fixed nodes from the node space. vertices = s.vertices() @@ -123,6 +124,7 @@ def compute_attractors_symbolic( def symbolic_attractor_test( + sd: SuccessionDiagram, node_id: int, graph: AsynchronousGraph, pivot: BooleanSpace, @@ -173,7 +175,7 @@ def symbolic_attractor_test( ] other_vars = sort_variable_list(other_vars) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Start symbolic reachability with {len(conflict_vars)} conflict variables and {len(other_vars)} other variables." ) @@ -186,7 +188,7 @@ def symbolic_attractor_test( saturation_done = False while not saturation_done: if avoid is not None and not avoid.intersect(reach_set).is_empty(): - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Discovered avoid state. Done.") return None @@ -196,10 +198,7 @@ def symbolic_attractor_test( if not successors.is_empty(): reach_set = reach_set.union(successors) saturation_done = False - if ( - reach_set.symbolic_size() > 100_000 - and biobalm.succession_diagram.DEBUG - ): + if reach_set.symbolic_size() > 100_000 and sd.config["debug"]: print( f"[{node_id}] > Saturation({len(saturated_vars)}) Expanded reach_set: {reach_set}" ) @@ -227,13 +226,13 @@ def symbolic_attractor_test( saturated_vars.append(var) saturated_vars = sort_variable_list(saturated_vars) - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print( f"[{node_id}] > Saturation({len(saturated_vars)}) Added saturation variable. {len(conflict_vars)} conflict and {len(other_vars)} other variables remaining." ) break - if biobalm.succession_diagram.DEBUG: + if sd.config["debug"]: print(f"[{node_id}] > Reachability completed with {reach_set}.") return reach_set diff --git a/biobalm/petri_net_translation.py b/biobalm/petri_net_translation.py index 261daee..0c7cda4 100644 --- a/biobalm/petri_net_translation.py +++ b/biobalm/petri_net_translation.py @@ -359,14 +359,14 @@ def network_to_petrinet( return pn -def _optimized_recursive_dnf_generator( +def optimized_recursive_dnf_generator( bdd: Bdd, ) -> Generator[BddPartialValuation, None, None]: """ Yields a generator of `BddPartialValuation` objects, similar to - `bdd.clause_iterator`, but uses a recursive optimization strategy to return - a smaller result than the default method `Bdd` clause sequence. Note that - this is still not the "optimal" DNF, but is often close enough. + `Bdd.clause_iterator` in AEON, but uses a recursive optimization strategy + to return a smaller result than the default method `Bdd` clause sequence. + Note that this is still not the "optimal" DNF, but is often close enough. This is technically slower for BDDs that already have a small clause count, but can be much better in the long-term when the clause count is high. @@ -398,11 +398,11 @@ def _optimized_recursive_dnf_generator( best_size = t_size + f_size best_var = var - for t_val in _optimized_recursive_dnf_generator(bdd.r_restrict({best_var: True})): + for t_val in optimized_recursive_dnf_generator(bdd.r_restrict({best_var: True})): t_val[best_var] = True yield t_val - for f_val in _optimized_recursive_dnf_generator(bdd.r_restrict({best_var: False})): + for f_val in optimized_recursive_dnf_generator(bdd.r_restrict({best_var: False})): f_val[best_var] = False yield f_val @@ -421,7 +421,7 @@ def _create_transitions( """ dir_str = "up" if go_up else "down" total = 0 - for t_id, implicant in enumerate(_optimized_recursive_dnf_generator(implicant_bdd)): + for t_id, implicant in enumerate(optimized_recursive_dnf_generator(implicant_bdd)): total += 1 t_name = f"tr_{var_name}_{dir_str}_{t_id + 1}" pn.add_node( # type: ignore[reportUnknownMemberType] diff --git a/biobalm/succession_diagram.py b/biobalm/succession_diagram.py index 4f12d79..f370564 100644 --- a/biobalm/succession_diagram.py +++ b/biobalm/succession_diagram.py @@ -32,12 +32,12 @@ ) from biobalm.space_utils import percolate_network, percolate_space, space_unique_key from biobalm.trappist_core import trappist -from biobalm.types import BooleanSpace, NodeData, SuccessionDiagramState - -DEBUG: bool = False -""" -If `True`, print debug and progress messages. -""" +from biobalm.types import ( + BooleanSpace, + NodeData, + SuccessionDiagramState, + SuccessionDiagramConfiguration, +) class SuccessionDiagram: @@ -79,26 +79,6 @@ class SuccessionDiagram: """ - NFVS_NODE_THRESHOLD: int = 2_000 - """ - If the number of nodes in the (redunced) network is greater than this - threshold, we find a feedback vertex set (without consideration of cycle - sign) for reachability preprocessing. There is a trade-off between the speed - gains from a smaller node set to consider and the cost of determining which - FVS nodes only intersect negative cycles to find an NFVS subset. Typically, - for smaller networks, the trade-off is in favor of computing a smaller NFVS. - """ - - MAX_MOTIFS_PER_NODE = 100_000 - """ - An artificial limit on the number of stable motifs that can be - considered during the expansion a single node. - - This limit is in place mainly to avoid surprising out of memory errors, - because currently there is no logging mechanism that would report the - number of stable motifs gradually. - """ - __slots__ = ( "network", "symbolic", @@ -106,9 +86,18 @@ class SuccessionDiagram: "nfvs", "dag", "node_indices", + "config", ) - def __init__(self, network: BooleanNetwork): + def __init__( + self, + network: BooleanNetwork, + config: SuccessionDiagramConfiguration | None = None, + ): + if config is None: + config = SuccessionDiagram.default_config() + self.config = config + # Original Boolean network. self.network: BooleanNetwork = cleanup_network(network) """ @@ -125,7 +114,7 @@ def __init__(self, network: BooleanNetwork): The Petri net representation of the network (see :mod:`petri_net_translation`). """ - if DEBUG: + if self.config["debug"]: print( f"Generated global Petri net with {len(self.petri_net.nodes)} nodes and {len(self.petri_net.edges)} edges." ) @@ -157,6 +146,7 @@ def __getstate__(self) -> SuccessionDiagramState: "nfvs": self.nfvs, "dag": self.dag, "node_indices": self.node_indices, + "config": self.config, } def __setstate__(self, state: SuccessionDiagramState): @@ -167,6 +157,7 @@ def __setstate__(self, state: SuccessionDiagramState): self.nfvs = state["nfvs"] self.dag = state["dag"] self.node_indices = state["node_indices"] + self.config = state["config"] def __len__(self) -> int: """ @@ -174,6 +165,17 @@ def __len__(self) -> int: """ return self.dag.number_of_nodes() + @staticmethod + def default_config() -> SuccessionDiagramConfiguration: + return { + "debug": False, + "max_motifs_per_node": 100_000, + "nfvs_size_threshold": 2_000, + "pint_goal_size_limit": 8_192, + "attractor_candidates_limit": 100_000, + "retained_set_optimization_threshold": 100, + } + @staticmethod def from_rules( rules: str, @@ -614,7 +616,7 @@ def reclaim_node_data(self): """ for node_id in self.node_ids(): - data = self.node_data[node_id] + data = self.node_data(node_id) data["percolated_network"] = None data["percolated_petri_net"] = None data["percolated_nfvs"] = None @@ -910,10 +912,8 @@ def node_percolated_nfvs(self, node_id: int, compute: bool = False) -> list[str] if node["percolated_nfvs"] is None: percolated_network = self.node_percolated_network(node_id, compute) - if ( - percolated_network.variable_count() - < SuccessionDiagram.NFVS_NODE_THRESHOLD - ): + percolated_size = percolated_network.variable_count() + if percolated_size < self.config["nfvs_size_threshold"]: # Computing the *negative* variant of the FVS is surprisingly costly. # Hence it mostly makes sense for the smaller networks only. nfvs = feedback_vertex_set(percolated_network, parity="negative") @@ -967,7 +967,7 @@ def node_percolated_network( network = percolate_network( self.network, node_space, self.symbolic, remove_constants=True ) - if DEBUG: + if self.config["debug"]: print( f"[{node_id}] Computed percolated network with {network.variable_count()} variables (vs {self.network.variable_count()})." ) @@ -1032,7 +1032,7 @@ def node_percolated_petri_net( percolated_pn = restrict_petrinet_to_subspace(base_pn, percolate_space) - if DEBUG: + if self.config["debug"]: print( f"[{node_id}] Generated Petri net restriction with {len(percolated_pn.nodes)} nodes and {len(percolated_pn.edges)} edges." ) @@ -1298,8 +1298,6 @@ def _expand_one_node(self, node_id: int): if node["expanded"]: return - node["expanded"] = True - # If the node had any attractor data computed as unexpanded, these are # no longer valid and need to be erased. node["attractor_seeds"] = None @@ -1308,7 +1306,7 @@ def _expand_one_node(self, node_id: int): current_space = node["space"] - if DEBUG: + if self.config["debug"]: print( f"[{node_id}] Expanding: {len(self.node_data(node_id)['space'])} fixed vars." ) @@ -1316,8 +1314,9 @@ def _expand_one_node(self, node_id: int): if len(current_space) == self.network.variable_count(): # This node is a fixed-point. Trappist would just # return this fixed-point again. No need to continue. - if DEBUG: + if self.config["debug"]: print(f"[{node_id}] Found fixed-point: {current_space}.") + node["expanded"] = True return # We use the non-propagated Petri net for backwards-compatibility reasons here. @@ -1338,7 +1337,7 @@ def _expand_one_node(self, node_id: int): pn, problem="max", optimize_source_variables=source_nodes, - solution_limit=SuccessionDiagram.MAX_MOTIFS_PER_NODE, + solution_limit=self.config["max_motifs_per_node"], ) sub_spaces = [(s | current_space) for s in partial_sub_spaces] else: @@ -1349,7 +1348,7 @@ def _expand_one_node(self, node_id: int): problem="max", ensure_subspace=current_space, optimize_source_variables=source_nodes, - solution_limit=SuccessionDiagram.MAX_MOTIFS_PER_NODE, + solution_limit=self.config["max_motifs_per_node"], ) # Release the Petri net once the sub_spaces are computed. @@ -1358,9 +1357,9 @@ def _expand_one_node(self, node_id: int): # in memory. node["percolated_petri_net"] = None - if len(sub_spaces) == SuccessionDiagram.MAX_MOTIFS_PER_NODE: + if len(sub_spaces) == self.config["max_motifs_per_node"]: raise RuntimeError( - f"Exceeded the maximum amount of stable motifs per node (SuccessionDiagram.MAX_MOTIFS_PER_NODE={SuccessionDiagram.MAX_MOTIFS_PER_NODE})." + f"Exceeded the maximum amount of stable motifs per node ({self.config['max_motifs_per_node']}; see `SuccessionDiagramConfiguration.max_motifs_per_node`)." ) # Sort the spaces based on a unique key in case trappist is not always @@ -1370,19 +1369,23 @@ def _expand_one_node(self, node_id: int): ) if len(sub_spaces) == 0: - if DEBUG: + if self.config["debug"]: print(f"[{node_id}] Found minimum trap space: {current_space}.") + node["expanded"] = True return - if DEBUG: + if self.config["debug"]: print(f"[{node_id}] Found sub-spaces: {len(sub_spaces)}") for sub_space in sub_spaces: child_id = self._ensure_node(node_id, sub_space) - if DEBUG: + if self.config["debug"]: print(f"[{node_id}] Created edge into node {child_id}.") + # If everything else worked out, we can mark the node as expanded. + node["expanded"] = True + def _ensure_node(self, parent_id: int | None, stable_motif: BooleanSpace) -> int: """ Internal method that ensures the provided node is present in this diff --git a/biobalm/types.py b/biobalm/types.py index 9f5692f..01eb09c 100644 --- a/biobalm/types.py +++ b/biobalm/types.py @@ -45,6 +45,11 @@ class SuccessionDiagramState(TypedDict): diagram (see :func:`biobalm.space_utils.space_unique_key`). """ + config: SuccessionDiagramConfiguration + """ + "Global" configuration of a succession diagram. + """ + class NodeData(TypedDict): """ @@ -153,3 +158,73 @@ class NodeData(TypedDict): If `None`, these have not been computed, and the node may or may not have associated attractors. """ + + +class SuccessionDiagramConfiguration(TypedDict): + """ + Describes the configuration options of a `SuccessionDiagram`. + + Use :meth:`SuccessionDiagram.default_config` to create a + configuration dictionary pre-populated with default values. + """ + + debug: bool + """ + If `True`, the `SuccessionDiagram` will print messages + describing the progress of the running operations. + + [Default: False] + """ + + max_motifs_per_node: int + """ + Limit on the number of stable motifs explored for one node of a succession + diagram. If this limit is exceeded during node expansion, a `RuntimeError` + is raised and the node remains unexpanded. + + This limit is in place mainly to avoid surprising out of memory errors, + because currently there is no logging mechanism that would report the + number of stable motifs gradually. + + [Default: 100_000] + """ + + nfvs_size_threshold: int + """ + For networks larger than this threshold, we only run FVS detection + instead of NFVS detection. This is still correct, but can produce + a larger node set. + + + There is a trade-off between the speed gains from a smaller node set + to consider and the cost of determining which FVS nodes only + intersect negative cycles to find an NFVS subset. Typically, + for smaller networks, the trade-off is in favor of + computing a smaller NFVS. + """ + + pint_goal_size_limit: int + """ + Pint is called using command line and can only accept a limited number of arguments. + This limit is applied when constructing goals to avoid reaching this argument limit. + + The limit currently applies to the total number of literals that can be used to + represent a goal. + + The default value was empirically tested as safe on Debian linux, but other operating + systems may need a different limit to stay safe. Nevertheless, this should not be + an issue on smaller/simpler networks. + """ + + attractor_candidates_limit: int + """ + If more than `attractor_candidates_limit` states are produced during the + attractor detection process, then the process fails with a `RuntimeError`. + This is mainly to avoid out-of-memory errors or crashing `clingo`. + """ + + retained_set_optimization_threshold: int + """ + If there are more than this amount of attractor candidates, the attractor + detection process will try to optimize the retained set using ASP (if enabled). + """ From 9f6de065fb94aabf555e56d9a8d1065a32b8e3bd Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 18 Apr 2024 17:13:26 +0200 Subject: [PATCH 18/18] Add config to other constructors. --- biobalm/succession_diagram.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/biobalm/succession_diagram.py b/biobalm/succession_diagram.py index f370564..974553c 100644 --- a/biobalm/succession_diagram.py +++ b/biobalm/succession_diagram.py @@ -180,6 +180,7 @@ def default_config() -> SuccessionDiagramConfiguration: def from_rules( rules: str, format: Literal["bnet", "aeon", "sbml"] = "bnet", + config: SuccessionDiagramConfiguration | None = None, ) -> SuccessionDiagram: """ Generate a succession diagram from the given string. @@ -191,6 +192,9 @@ def from_rules( format : Literal['bnet', 'aeon', 'sbml'] The format of the string. One of `"bnet"`, `"aeon"`, or `"sbml"`. Defaults to `"bnet"`. + config : SuccessionDiagramConfiguration | None + An optional configuration object with internal settings + and default values. Returns ------- @@ -199,21 +203,26 @@ def from_rules( """ if format == "bnet": - return SuccessionDiagram(BooleanNetwork.from_bnet(rules)) + return SuccessionDiagram(BooleanNetwork.from_bnet(rules), config) elif format == "aeon": - return SuccessionDiagram(BooleanNetwork.from_aeon(rules)) + return SuccessionDiagram(BooleanNetwork.from_aeon(rules), config) elif format == "sbml": - return SuccessionDiagram(BooleanNetwork.from_sbml(rules)) + return SuccessionDiagram(BooleanNetwork.from_sbml(rules), config) else: raise ValueError(f"Unknown format: {format}") @staticmethod - def from_file(path: str) -> SuccessionDiagram: + def from_file( + path: str, config: SuccessionDiagramConfiguration | None = None + ) -> SuccessionDiagram: """ Read a `BooleanNetwork` from the given file path. The format is automatically inferred from the file extension. + + Optionally, you can also supply a configuration object to customize the + resulting succession diagram. """ - return SuccessionDiagram(BooleanNetwork.from_file(path)) + return SuccessionDiagram(BooleanNetwork.from_file(path), config) def expanded_attractor_candidates(self) -> dict[int, list[BooleanSpace]]: """