From 7fe6672def00690537b27c90ad94eee4ce62d1ff Mon Sep 17 00:00:00 2001 From: Mats Rydberg Date: Tue, 23 Sep 2025 12:27:43 +0200 Subject: [PATCH 1/4] Add json file to help standardise parameter descriptions --- .../api/docstrings/parameters.json | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 graphdatascience/procedure_surface/api/docstrings/parameters.json diff --git a/graphdatascience/procedure_surface/api/docstrings/parameters.json b/graphdatascience/procedure_surface/api/docstrings/parameters.json new file mode 100644 index 000000000..7ca23faea --- /dev/null +++ b/graphdatascience/procedure_surface/api/docstrings/parameters.json @@ -0,0 +1,29 @@ +{ + "G": "The graph to run the algorithm on.", + "sudo": "Disable the memory guard.", + "log_progress": "Display progress logging.", + "username": "As an administrator, run the algorithm as a different user, to access also their graphs.", + "job_id": "Identifier for the computation.", + + "tolerance": "Minimum change in scores between iterations.", + "concurrency": "Number of CPU threads to use.", + "max_iterations": "Maximum number of iterations to run.", + "scaler": "Name of the scaler applied on the resulting scores.", + "source_nodes": "List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.", + "orientation": "The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.", + + "sampling_size": "Number of source nodes to consider for computing centrality scores.", + "sampling_seed": "Seed value for the random number generator that selects source nodes.", + "damping_factor": "Probability of a jump to a random node.", + "use_wasserman_faust": "Use the improved Wasserman-Faust formula for closeness computation.", + "seed_set_size": "Number of nodes that maximize the expected spread in the network.", + "propagation_probability": "Probability of a node being activated by an active neighbour node.", + "monte_carlo_simulations": "Number of Monte-Carlo simulations.", + + "mutate_property": "Name of the node property to store the results in.", + "write_property": "Name of the node property to store the results in.", + + "node_labels": "Filter the graph using the given node labels. Nodes with any of the given labels will be included.", + "relationship_types": "Filter the graph using the given relationship types. Relationships with any of the given types will be included.", + "relationship_weight_property": "Name of the property to be used as weights.", +} \ No newline at end of file From f55b82b89d47a0bc90e7ea68cc61476040c79690 Mon Sep 17 00:00:00 2001 From: Mats Rydberg Date: Tue, 23 Sep 2025 12:28:13 +0200 Subject: [PATCH 2/4] Describe sudo the same way --- .../procedure_surface/api/catalog/node_label_endpoints.py | 4 ++-- .../api/catalog/node_properties_endpoints.py | 6 +++--- .../api/catalog/relationships_endpoints.py | 8 ++++---- .../procedure_surface/api/catalog_endpoints.py | 2 +- .../api/centrality/articlerank_endpoints.py | 2 +- .../api/centrality/articulationpoints_endpoints.py | 8 ++++---- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py b/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py index fbe1cf8db..72d013956 100644 --- a/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py @@ -34,7 +34,7 @@ def mutate( node_filter : str A Cypher predicate for filtering nodes in the input graph. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -78,7 +78,7 @@ def write( node_filter : str A Cypher predicate for filtering nodes in the input graph. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None diff --git a/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py b/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py index 27cb97b5c..1846a547e 100644 --- a/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py @@ -42,7 +42,7 @@ def stream( concurrency : Optional[Any], default=None The number of concurrent threads sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -89,7 +89,7 @@ def write( write_concurrency : Optional[Any], default=None The number of concurrent threads used for writing sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -130,7 +130,7 @@ def drop( concurrency : Optional[Any], default=None The number of concurrent threads sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None diff --git a/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py b/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py index 0929bde06..97e1b2f47 100644 --- a/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py @@ -39,7 +39,7 @@ def stream( concurrency : Optional[Any], default=None The number of concurrent threads sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -81,7 +81,7 @@ def write( write_concurrency : Optional[Any], default=None The number of concurrent threads used for writing sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -146,7 +146,7 @@ def index_inverse( concurrency : Optional[Any], default=None The number of concurrent threads sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -192,7 +192,7 @@ def to_undirected( concurrency : Optional[Any], default=None The number of concurrent threads sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None diff --git a/graphdatascience/procedure_surface/api/catalog_endpoints.py b/graphdatascience/procedure_surface/api/catalog_endpoints.py index 3e35f97df..ca7618eb1 100644 --- a/graphdatascience/procedure_surface/api/catalog_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog_endpoints.py @@ -118,7 +118,7 @@ def generate( job_id : Optional[str], default=None Unique identifier for the job associated with the graph generation. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress during graph generation. username : Optional[str], default=None diff --git a/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py b/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py index c69c70464..22805eef5 100644 --- a/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py @@ -176,7 +176,7 @@ def stream( node_labels : Optional[List[str]], default=None The node labels used to select nodes for this algorithm run sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None diff --git a/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py b/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py index b2e387af4..b2bfb9f22 100644 --- a/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py @@ -38,7 +38,7 @@ def mutate( node_labels : Optional[List[str]], default=None The node labels used to select nodes for this algorithm run sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -80,7 +80,7 @@ def stats( node_labels : Optional[List[str]], default=None The node labels used to select nodes for this algorithm run sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -120,7 +120,7 @@ def stream( node_labels : Optional[List[str]], default=None The node labels used to select nodes for this algorithm run sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None @@ -168,7 +168,7 @@ def write( node_labels : Optional[List[str]], default=None The node labels used to select nodes for this algorithm run sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None Whether to log progress username : Optional[str], default=None From a131610b6d6d9121c61ce208934d336ca9f33658 Mon Sep 17 00:00:00 2001 From: Mats Rydberg Date: Mon, 29 Sep 2025 13:36:46 +0200 Subject: [PATCH 3/4] Whitespace --- .../procedure_surface/api/centrality/betweenness_endpoints.py | 2 ++ .../procedure_surface/api/centrality/pagerank_endpoints.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py b/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py index 7b0aa0bdb..8c5f8e1b1 100644 --- a/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py @@ -7,10 +7,12 @@ from graphdatascience.procedure_surface.api.base_result import BaseResult from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2 + from graphdatascience.procedure_surface.api.estimation_result import EstimationResult class BetweennessEndpoints(ABC): + @abstractmethod def mutate( self, diff --git a/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py b/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py index f0dcb05d2..3ce353ce8 100644 --- a/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py @@ -7,10 +7,12 @@ from graphdatascience.procedure_surface.api.base_result import BaseResult from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2 + from graphdatascience.procedure_surface.api.estimation_result import EstimationResult class PageRankEndpoints(ABC): + @abstractmethod def mutate( self, From b7bab91a236603c6f6b96e7dd5e8be66056e08b4 Mon Sep 17 00:00:00 2001 From: Mats Rydberg Date: Mon, 29 Sep 2025 13:36:58 +0200 Subject: [PATCH 4/4] Community mutate --- .../api/community/k1coloring_endpoints.py | 26 +++++++------- .../api/community/kcore_endpoints.py | 24 +++++++------ .../api/community/louvain_endpoints.py | 34 +++++++++++-------- .../api/community/scc_endpoints.py | 23 +++++++------ .../api/community/wcc_endpoints.py | 27 ++++++++------- 5 files changed, 74 insertions(+), 60 deletions(-) diff --git a/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py b/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py index f1a5c5ac2..26e53b75a 100644 --- a/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py @@ -11,9 +11,6 @@ class K1ColoringEndpoints(ABC): - """ - Abstract base class defining the API for the K-1 Coloring algorithm. - """ @abstractmethod def mutate( @@ -31,32 +28,35 @@ def mutate( job_id: Optional[Any] = None, ) -> K1ColoringMutateResult: """ - Executes the K-1 Coloring algorithm and writes the results to the in-memory graph as node properties. + Runs the K-1 Coloring algorithm and stores the results in the graph catalog as a new node property. + + The K-1 Coloring algorithm assigns a color to every node in the graph, trying to optimize for two objectives: + to make sure that every neighbor of a given node has a different color than the node itself, and to use as few colors as possible. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the color for each node + Name of the node property to store the results in. batch_size : Optional[int], default=None The batch size for processing max_iterations : Optional[int], default=None - The maximum number of iterations of K-1 Coloring to run + Maximum number of iterations to run. relationship_types : Optional[List[str]], default=None - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : Optional[List[str]], default=None - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None - Whether to log progress + Display progress logging. username : Optional[str], default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : Optional[Any], default=None - The number of concurrent threads + Number of CPU threads to use. job_id : Optional[Any], default=None - An identifier for the job + Identifier for the computation. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/kcore_endpoints.py b/graphdatascience/procedure_surface/api/community/kcore_endpoints.py index c131f4bc3..d5339dc68 100644 --- a/graphdatascience/procedure_surface/api/community/kcore_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/kcore_endpoints.py @@ -29,28 +29,32 @@ def mutate( job_id: Optional[Any] = None, ) -> KCoreMutateResult: """ - Executes the K-Core algorithm and writes the results to the in-memory graph as node properties. + Runs the K-Core Decomposition algorithm and stores the results in the graph catalog as a new node property. + + The K-core decomposition constitutes a process that separates the nodes in a graph into groups based on the degree sequence and topology of the graph. + The term `i-core` refers to a maximal subgraph of the original graph such that each node in this subgraph has degree at least `i`. + Each node is associated with a core value which denotes the largest value `i` such that the node belongs to the `i-core`. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the core value for each node + Name of the node property to store the results in. relationship_types : Optional[List[str]], default=None - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : Optional[List[str]], default=None - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None - Whether to log progress + Display progress logging. username : Optional[str], default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : Optional[Any], default=None - The number of concurrent threads - job_id : Optional[Any], default=None - An identifier for the job + Number of CPU threads to use. + job_id : Optional[Any] = None + Identifier for the computation. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/louvain_endpoints.py b/graphdatascience/procedure_surface/api/community/louvain_endpoints.py index 60a158518..005a0dd92 100644 --- a/graphdatascience/procedure_surface/api/community/louvain_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/louvain_endpoints.py @@ -36,42 +36,46 @@ def mutate( relationship_weight_property: Optional[str] = None, ) -> LouvainMutateResult: """ - Executes the Louvain algorithm and writes the results to the in-memory graph as node properties. + Runs the Louvain algorithm and stores the results in the graph catalog as a new node property. + + The Louvain method is an algorithm to detect communities in large networks. + It maximizes a modularity score for each community, where the modularity quantifies the quality of an assignment of nodes to communities by evaluating how much more densely connected the nodes within a community are, compared to how connected they would be in a random network. + The Louvain algorithm is a hierarchical clustering algorithm that recursively merges communities into a single node and runs the modularity clustering on the condensed graphs. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the community ID for each node + Name of the node property to store the results in. tolerance : Optional[float], default=None - The tolerance value for the algorithm convergence + Minimum change in scores between iterations. max_levels : Optional[int], default=None The maximum number of levels in the hierarchy include_intermediate_communities : Optional[bool], default=None - Whether to include intermediate community assignments + Whether to include intermediate communities max_iterations : Optional[int], default=None - The maximum number of iterations per level + Maximum number of iterations to run. relationship_types : Optional[List[str]], default=None - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : Optional[List[str]], default=None - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None - Whether to log progress + Display progress logging. username : Optional[str], default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : Optional[Any], default=None - The number of concurrent threads + Number of CPU threads to use. job_id : Optional[Any], default=None - An identifier for the job + Identifier for the computation. seed_property : Optional[str], default=None - Defines node properties that are used as initial community identifiers + The property name that contains seed values consecutive_ids : Optional[bool], default=None - Flag to decide whether community identifiers are mapped into a consecutive id space + Whether to use consecutive IDs relationship_weight_property : Optional[str], default=None - The property name that contains weight + Name of the property to be used as weights. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/scc_endpoints.py b/graphdatascience/procedure_surface/api/community/scc_endpoints.py index 535dde4a8..47a4f4ff3 100644 --- a/graphdatascience/procedure_surface/api/community/scc_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/scc_endpoints.py @@ -30,30 +30,33 @@ def mutate( consecutive_ids: Optional[bool] = None, ) -> SccMutateResult: """ - Executes the SCC algorithm and writes the results to the in-memory graph as node properties. + Runs the Strongly Connected Components algorithm and stores the results in the graph catalog as a new node property. + + The Strongly Connected Components (SCC) algorithm finds maximal sets of connected nodes in a directed graph. + A set is considered a strongly connected component if there is a directed path between each pair of nodes within the set. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the component ID for each node + Name of the node property to store the results in. relationship_types : Optional[List[str]], default=None - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : Optional[List[str]], default=None - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None - Whether to log progress + Display progress logging. username : Optional[str], default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : Optional[Any], default=None - The number of concurrent threads + Number of CPU threads to use. job_id : Optional[Any], default=None - An identifier for the job + Identifier for the computation. consecutive_ids : Optional[bool], default=None - Flag to decide whether component identifiers are mapped into a consecutive id space + Whether to use consecutive IDs for components Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/wcc_endpoints.py b/graphdatascience/procedure_surface/api/community/wcc_endpoints.py index 3a88969b0..64312a673 100644 --- a/graphdatascience/procedure_surface/api/community/wcc_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/wcc_endpoints.py @@ -33,36 +33,39 @@ def mutate( relationship_weight_property: Optional[str] = None, ) -> WccMutateResult: """ - Executes the WCC algorithm and writes the results to the in-memory graph as node properties. + Runs the Weakly Connected Components algorithm and stores the results in the graph catalog as a new node property. + + The Weakly Connected Components (WCC) algorithm finds sets of connected nodes in directed and undirected graphs where two nodes are connected if there exists a path between them. + In contrast to Strongly Connected Components (SCC), the direction of relationships on the path between two nodes is not considered. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the component ID for each node + Name of the node property to store the results in. threshold : Optional[float], default=None The minimum required weight to consider a relationship during traversal relationship_types : Optional[List[str]], default=None - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : Optional[List[str]], default=None - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : Optional[bool], default=None - Override memory estimation limits + Disable the memory guard. log_progress : Optional[bool], default=None - Whether to log progress + Display progress logging. username : Optional[str], default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : Optional[Any], default=None - The number of concurrent threads + Number of CPU threads to use. job_id : Optional[Any], default=None - An identifier for the job + Identifier for the computation. seed_property : Optional[str], default=None - Defines node properties that are used as initial component identifiers + The property name that contains seed values consecutive_ids : Optional[bool], default=None - Flag to decide whether component identifiers are mapped into a consecutive id space + Whether to use consecutive IDs for components relationship_weight_property : Optional[str], default=None - The property name that contains weight + Name of the property to be used as weights. Returns -------