From f060bbf3dd57078717ea46369d9fef6f80e220d4 Mon Sep 17 00:00:00 2001 From: pierrick Date: Tue, 9 Jul 2024 15:24:11 +0200 Subject: [PATCH 1/8] created path lifting files post precommit --- .../graph2hypergraph/path_lifting.yaml | 3 + modules/transforms/data_transform.py | 2 + .../liftings/graph2hypergraph/path_lifting.py | 136 ++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100755 configs/transforms/liftings/graph2hypergraph/path_lifting.yaml create mode 100644 modules/transforms/liftings/graph2hypergraph/path_lifting.py diff --git a/configs/transforms/liftings/graph2hypergraph/path_lifting.yaml b/configs/transforms/liftings/graph2hypergraph/path_lifting.yaml new file mode 100755 index 00000000..2e7fee78 --- /dev/null +++ b/configs/transforms/liftings/graph2hypergraph/path_lifting.yaml @@ -0,0 +1,3 @@ +transform_type: 'lifting' +transform_name: "PathLifting" +feature_lifting: ProjectionSum \ No newline at end of file diff --git a/modules/transforms/data_transform.py b/modules/transforms/data_transform.py index 59253ecf..74a7ff2e 100755 --- a/modules/transforms/data_transform.py +++ b/modules/transforms/data_transform.py @@ -12,6 +12,7 @@ from modules.transforms.liftings.graph2hypergraph.knn_lifting import ( HypergraphKNNLifting, ) +from modules.transforms.liftings.graph2hypergraph.path_lifting import PathLifting from modules.transforms.liftings.graph2simplicial.clique_lifting import ( SimplicialCliqueLifting, ) @@ -31,6 +32,7 @@ "OneHotDegreeFeatures": OneHotDegreeFeatures, "NodeFeaturesToFloat": NodeFeaturesToFloat, "KeepOnlyConnectedComponent": KeepOnlyConnectedComponent, + "PathLifting": PathLifting, } diff --git a/modules/transforms/liftings/graph2hypergraph/path_lifting.py b/modules/transforms/liftings/graph2hypergraph/path_lifting.py new file mode 100644 index 00000000..f3a29ea2 --- /dev/null +++ b/modules/transforms/liftings/graph2hypergraph/path_lifting.py @@ -0,0 +1,136 @@ +"""A module for the PathLifting class.""" +import networkx as nx +import numpy as np +import torch +import torch_geometric + +from modules.transforms.liftings.graph2hypergraph.base import Graph2HypergraphLifting + + +class PathLifting(Graph2HypergraphLifting): + """Lifts graphs to hypergraph domain by considering paths between nodes.""" + + def __init__( + self, + source_nodes: list[int], + target_nodes: list[int], + lengths: list[int], + include_smaller_paths=False, + **kwargs, + ): + # guard clauses + if len(source_nodes) != len(lengths): + raise ValueError("source_nodes and lengths must have the same length") + if target_nodes is not None and len(target_nodes) != len(source_nodes): + raise ValueError( + "When target_nodes is not None, it must have the same length" + "as source_nodes" + ) + if len(source_nodes) == 0: + raise ValueError( + "source_nodes,target_nodes and lengths must have at least one element" + ) + + super().__init__(**kwargs) + self.source_nodes = source_nodes + self.target_nodes = target_nodes + self.lengths = lengths + self.include_smaller_paths = include_smaller_paths + + def find_hyperedges(self, data: torch_geometric.data.Data): + """Finds hyperedges from paths between nodes in a graph.""" + G = torch_geometric.utils.convert.to_networkx(data, to_undirected=True) + s_hyperedges = set() + + if self.target_nodes is None: # all paths stemming from source nodes only + for source, length in zip(self.source_nodes, self.lengths): + D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) + s = self.extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) + s_hyperedges = s_hyperedges.union(s) + + else: # paths from source_nodes to target_nodes or from source nodes only + for source, target, length in zip( + self.source_nodes, self.target_nodes, self.lengths + ): + if target is None: + D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) + s = self.extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) + s_hyperedges = s_hyperedges.union(s) + else: + paths = list( + nx.all_simple_paths( + G, source=source, target=target, cutoff=length + ) + ) + if not self.include_smaller_paths: + paths = [path for path in paths if len(path) - 1 == length] + s_hyperedges = s_hyperedges.union({frozenset(x) for x in paths}) + return s_hyperedges + + def lift_topology(self, data: torch_geometric.data.Data): + s_hyperedges = self.find_hyperedges(data) + indices = [[], []] + for edge_id, x in enumerate(s_hyperedges): + indices[1].extend([edge_id] * len(x)) + indices[0].extend(list(x)) + incidence = torch.sparse_coo_tensor( + indices, torch.ones(len(indices[0])), (len(data.x), len(s_hyperedges)) + ) + return { + "incidence_hyperedges": incidence, + "num_hyperedges": len(s_hyperedges), + "x_0": data.x, + } + + def build_stemmingTree(self, G, source_root, length, verbose=False): + """Creates a directed tree from a source node with paths of a given length.""" + d_id2label = {} + stack = [] + D = nx.DiGraph() + n_id = 0 + D.add_node(n_id) + d_id2label[n_id] = source_root + stack.append(n_id) + n_id += 1 + l_leafs = [] + while len(stack) > 0: + node = stack.pop() + neighbors = list(G.neighbors(d_id2label[node])) + visited_id = nx.shortest_path(D, source=0, target=node) + visited_labels = [d_id2label[i] for i in visited_id] + for neighbor in neighbors: + if neighbor not in visited_labels: + D.add_node(n_id) + d_id2label[n_id] = neighbor + if len(visited_labels) < length: + stack.append(n_id) + elif len(visited_labels) == length: + l_leafs.append(n_id) + else: + raise ValueError("Visited labels length is greater than length") + D.add_edge(node, n_id) + n_id += 1 + if verbose: + print("\nLoop Variables Summary:") + print("nodes:", node) + print("neighbors:", neighbors) + print("visited_id:", visited_id) + print("visited_labels:", visited_labels) + print("stack:", stack) + print("id2label:", d_id2label) + return D, d_id2label, l_leafs + + def extract_hyperedgesFromStemmingTree(self, D, d_id2label, l_leafs): + """From the root of the directed tree D, + extract hyperedges from the paths to the leafs.""" + a_paths = np.array( + [list(map(d_id2label.get, nx.shortest_path(D, 0, x))) for x in l_leafs] + ) + s_hyperedges = { + (frozenset(x)) for x in a_paths + } # set bc != paths can be same hpedge + if self.include_smaller_paths: + for i in range(a_paths.shape[1] - 1, 1, -1): + a_paths = np.unique(a_paths[:, :i], axis=0) + s_hyperedges = s_hyperedges.union({(frozenset(x)) for x in a_paths}) + return s_hyperedges From 11df3be940b2cb5382d1f0e7702447bde2331ba8 Mon Sep 17 00:00:00 2001 From: pierrick Date: Thu, 11 Jul 2024 15:20:57 +0200 Subject: [PATCH 2/8] added tuto notebook --- tutorials/graph2hypergraph/path_lifting.ipynb | 343 ++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 tutorials/graph2hypergraph/path_lifting.ipynb diff --git a/tutorials/graph2hypergraph/path_lifting.ipynb b/tutorials/graph2hypergraph/path_lifting.ipynb new file mode 100644 index 00000000..e3a29871 --- /dev/null +++ b/tutorials/graph2hypergraph/path_lifting.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graph-to-Hypergraph Path Lifting Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "This notebook shows how to import a dataset, with the desired lifting, and how to run a neural network using the loaded data.\n", + "\n", + "The notebook is divided into sections:\n", + "\n", + "- [Loading the dataset](#loading-the-dataset) loads the config files for the data, creates a a dataset object and visualizes it.\n", + "- [Loading and applying the lifting](#loading-and-applying-the-lifting) the desired lifting tranformation is configured and applied.\n", + "- [Create and run a simplicial nn model](#create-and-run-a-simplicial-nn-model) defines a simple neural network and simply runs a forward pass to check that everything is working as expected.\n", + "\n", + "***\n", + "***\n", + "\n", + "Note that for simplicity the notebook is setup to use a simple graph. However, there is a set of available datasets that you can play with.\n", + "\n", + "To switch to one of the available datasets, simply change the *dataset_name* variable to one of the following names:\n", + "\n", + "* cocitation_cora\n", + "* cocitation_citeseer\n", + "* cocitation_pubmed\n", + "* MUTAG\n", + "* NCI1\n", + "* NCI109\n", + "* PROTEINS_TU\n", + "* AQSOL\n", + "* ZINC\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Imports and utilities" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "# With this cell any imported module is reloaded before each cell execution\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "import sys\n", + "\n", + "sys.path.append(\"../..\")\n", + "from modules.data.load.loaders import GraphLoader\n", + "from modules.data.preprocess.preprocessor import PreProcessor\n", + "from modules.utils.utils import (\n", + " describe_data,\n", + " load_dataset_config,\n", + " load_model_config,\n", + " load_transform_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we just need to spicify the name of the available dataset that we want to load. First, the dataset config is read from the corresponding yaml file (located at `/configs/datasets/` directory), and then the data is loaded via the implemented `Loaders`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset configuration for manual_dataset:\n", + "\n", + "{'data_domain': 'graph',\n", + " 'data_type': 'toy_dataset',\n", + " 'data_name': 'manual',\n", + " 'data_dir': 'datasets/graph/toy_dataset',\n", + " 'num_features': 1,\n", + " 'num_classes': 2,\n", + " 'task': 'classification',\n", + " 'loss_type': 'cross_entropy',\n", + " 'monitor_metric': 'accuracy',\n", + " 'task_level': 'node'}\n" + ] + } + ], + "source": [ + "dataset_name = \"manual_dataset\"\n", + "dataset_config = load_dataset_config(dataset_name)\n", + "loader = GraphLoader(dataset_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - Graph with 8 vertices and 13 edges.\n", + " - Features dimensions: [1, 0]\n", + " - There are 0 isolated nodes.\n", + "\n" + ] + } + ], + "source": [ + "dataset = loader.load()\n", + "describe_data(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and Applying the Lifting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section we will instantiate the lifting we want to apply to the data. For this example the knn lifting was chosen. The algorithm takes the k nearest neighbors for each node and creates a hyperedge with them. The lifting is deterministic and creates a hypergraph with the same number of hyperedges as the number of nodes, and all the hyperedges have the same number of nodes in them. This lifting is based on the initial features of the nodes. The computational complexity of the algorithm is $O(nd+kn)$ [[1]](https://pubmed.ncbi.nlm.nih.gov/33211654/) where $n$ is the number of nodes in the graph, $d$ is the dimension of the feature space and $k$ is fixed.\n", + "\n", + "***\n", + "[[1]](https://pubmed.ncbi.nlm.nih.gov/33211654/) Gao, Y., Zhang, Z., Lin, H., Zhao, X., Du, S., & Zou, C. (2020). Hypergraph learning: Methods and\n", + "practices. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(5), 2548-2566.\n", + "***\n", + "\n", + "For hypergraphs creating a lifting involves creating the `incidence_hyperedges` matrix.\n", + "\n", + "Similarly to before, we can specify the transformation we want to apply through its type and id --the correxponding config files located at `/configs/transforms`. \n", + "\n", + "Note that the *tranform_config* dictionary generated below can contain a sequence of tranforms if it is needed.\n", + "\n", + "This can also be used to explore liftings from one topological domain to another, for example using two liftings it is possible to achieve a sequence such as: graph -> simplicial complex -> hypergraph. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for graph2hypergraph/path_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'PathLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n" + ] + } + ], + "source": [ + "# Define transformation type and id\n", + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"graph2hypergraph/path_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}\n", + "# add required arguments\n", + "transform_config[\"lifting\"][\"source_nodes\"] = [1]\n", + "transform_config[\"lifting\"][\"target_nodes\"] = [2]\n", + "transform_config[\"lifting\"][\"lengths\"] = [2]\n", + "\n", + "# select a value for optional argument\n", + "transform_config[\"lifting\"][\"include_smaller_paths\"] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transform parameters are the same, using existing data_dir: /home/pierrick/local/challenge-icml-2024/datasets/graph/toy_dataset/manual/lifting/480906646\n", + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - Hypergraph with 8 vertices and 2 hyperedges.\n", + " - The nodes have feature dimensions 1.\n", + " - The hyperedges have feature dimensions 1.\n", + "\n" + ] + } + ], + "source": [ + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create and Run an hypergraph NN Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section a simple model is created to test that the used lifting works as intended. In this case the model uses the `incidence_hyperedges` matrix so the lifting should make sure to add it to the data." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Model configuration for hypergraph UNIGCN:\n", + "\n", + "{'in_channels': None,\n", + " 'hidden_channels': 32,\n", + " 'out_channels': None,\n", + " 'n_layers': 2}\n" + ] + } + ], + "source": [ + "from modules.models.hypergraph.unigcn import UniGCNModel\n", + "\n", + "model_type = \"hypergraph\"\n", + "model_id = \"unigcn\"\n", + "model_config = load_model_config(model_type, model_id)\n", + "\n", + "model = UniGCNModel(model_config, dataset_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "y_hat = model(lifted_dataset.get(0))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "topox", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From df87a7094eda4400cb69332e6d1829cf9c42af35 Mon Sep 17 00:00:00 2001 From: pierrick Date: Thu, 11 Jul 2024 18:16:25 +0200 Subject: [PATCH 3/8] added tests --- .../graph2hypergraph/test_path_lifting.py | 154 ++++++++++++++++++ tutorials/graph2hypergraph/path_lifting.ipynb | 42 ++--- 2 files changed, 164 insertions(+), 32 deletions(-) create mode 100644 test/transforms/liftings/graph2hypergraph/test_path_lifting.py diff --git a/test/transforms/liftings/graph2hypergraph/test_path_lifting.py b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py new file mode 100644 index 00000000..a8dd9cc3 --- /dev/null +++ b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py @@ -0,0 +1,154 @@ +"""Test the path lifting module.""" + +import numpy as np + +from modules.data.load.loaders import GraphLoader +from modules.transforms.liftings.graph2hypergraph.path_lifting import PathLifting +from modules.utils.utils import load_dataset_config + + +class TestHypergraphPathLifting: + """Test the PathLifting class.""" + + def setup_method(self): + """Initialise the PathLifting class.""" + dataset_config = load_dataset_config("manual_dataset") + loader = GraphLoader(dataset_config) + self.dataset = loader.load() + self.data = self.dataset._data + + def test_true(self): + """Naive test to check if the test is running.""" + assert True + + # def test_false(self): + # """Naive test to check if the test is running.""" + # assert False + + def test_1(self): + """Verifies setup_method is working.""" + assert self.dataset is not None + + def test_2(self): + """test: no target node for one source node returns something""" + source_nodes = [0, 2] + target_nodes = [1, None] + lengths = [2, 2] + include_smaller_paths = True + path_lifting = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ) + res = path_lifting.find_hyperedges(self.data) + res_expected = [ + [0, 1], + [0, 1, 2], + [0, 4, 1], + [2, 4], + [2, 1], + [2, 0], + [2, 7], + [2, 5], + [2, 3], + [2, 1, 4], + [2, 4, 0], + [2, 1, 0], + [2, 0, 7], + [2, 5, 7], + [2, 3, 6], + [2, 5, 6], + # [], + ] + assert {frozenset(x) for x in res_expected} == res + + def test_3(self): + """test: include_smaller_paths=False""" + source_nodes = [0] + target_nodes = [1] + lengths = [2] + include_smaller_paths = False + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + assert not frozenset({0, 1}) in res + + def test_4(self): + """test: include_smaller_paths=True""" + source_nodes = [0] + target_nodes = [1] + lengths = [2] + include_smaller_paths = True + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + assert frozenset({0, 1}) in res + + def test_5(self): + """test: when include_smaller_paths=False all paths have the length specified""" + source_nodes = [0] + target_nodes = [1] + include_smaller_paths = False + for k in range(1, 5): + lengths = [k] + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + assert np.array([len(x) - 1 == k for x in res]).all() + + def test_6(self): + """test: no target node global returns something""" + source_nodes = [0, 1] + target_nodes = None + lengths = [2, 2] + include_smaller_paths = False + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + assert len(res) > 0 + + def test_7(self): + """test: every hyperedge contains the source and target nodes when specified""" + a = np.random.choice(np.arange(len(self.data.x)), 2, replace=False) + source_nodes = [a[0]] + target_nodes = [a[1]] + lengths = [np.random.randint(1, 5)] + include_smaller_paths = False + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + if len(res) > 0: + assert ( + np.array([source_nodes[0] in x for x in res]).all() + and np.array([target_nodes[0] in x for x in res]).all() + ) + + def test_8(self): + """test: no target node for one source node returns something""" + source_nodes = [0, 2] + target_nodes = [1, None] + lengths = [2, 2] + include_smaller_paths = False + res = PathLifting( + source_nodes, + target_nodes, + lengths, + include_smaller_paths=include_smaller_paths, + ).find_hyperedges(self.data) + assert len(res) > 0 diff --git a/tutorials/graph2hypergraph/path_lifting.ipynb b/tutorials/graph2hypergraph/path_lifting.ipynb index e3a29871..5269dada 100644 --- a/tutorials/graph2hypergraph/path_lifting.ipynb +++ b/tutorials/graph2hypergraph/path_lifting.ipynb @@ -48,18 +48,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "# With this cell any imported module is reloaded before each cell execution\n", "%load_ext autoreload\n", @@ -88,12 +79,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here we just need to spicify the name of the available dataset that we want to load. First, the dataset config is read from the corresponding yaml file (located at `/configs/datasets/` directory), and then the data is loaded via the implemented `Loaders`.\n" + "Here we just need to specify the name of the available dataset that we want to load. First, the dataset config is read from the corresponding yaml file (located at `/configs/datasets/` directory), and then the data is loaded via the implemented `Loaders`." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -124,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -172,25 +163,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this section we will instantiate the lifting we want to apply to the data. For this example the knn lifting was chosen. The algorithm takes the k nearest neighbors for each node and creates a hyperedge with them. The lifting is deterministic and creates a hypergraph with the same number of hyperedges as the number of nodes, and all the hyperedges have the same number of nodes in them. This lifting is based on the initial features of the nodes. The computational complexity of the algorithm is $O(nd+kn)$ [[1]](https://pubmed.ncbi.nlm.nih.gov/33211654/) where $n$ is the number of nodes in the graph, $d$ is the dimension of the feature space and $k$ is fixed.\n", - "\n", - "***\n", - "[[1]](https://pubmed.ncbi.nlm.nih.gov/33211654/) Gao, Y., Zhang, Z., Lin, H., Zhao, X., Du, S., & Zou, C. (2020). Hypergraph learning: Methods and\n", - "practices. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(5), 2548-2566.\n", - "***\n", - "\n", - "For hypergraphs creating a lifting involves creating the `incidence_hyperedges` matrix.\n", - "\n", - "Similarly to before, we can specify the transformation we want to apply through its type and id --the correxponding config files located at `/configs/transforms`. \n", - "\n", - "Note that the *tranform_config* dictionary generated below can contain a sequence of tranforms if it is needed.\n", - "\n", - "This can also be used to explore liftings from one topological domain to another, for example using two liftings it is possible to achieve a sequence such as: graph -> simplicial complex -> hypergraph. " + "In this section we will instantiate the lifting we want to apply to the data." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -228,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -282,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -311,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ From 5676e4e7ac5b8325a53b5ee24a112c2c9292a973 Mon Sep 17 00:00:00 2001 From: pierrick Date: Thu, 11 Jul 2024 18:17:31 +0200 Subject: [PATCH 4/8] fix bug in plot_manual_graph --- modules/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/utils/utils.py b/modules/utils/utils.py index 1dfcdc2e..54e955d5 100644 --- a/modules/utils/utils.py +++ b/modules/utils/utils.py @@ -233,7 +233,7 @@ def sort_vertices_ccw(vertices): n_hyperedges = incidence.shape[1] vertices += [i + n_vertices for i in range(n_hyperedges)] indices = incidence.indices() - edges = np.array([indices[1].numpy(), indices[0].numpy() + n_vertices]).T + edges = np.array([indices[0].numpy(), indices[1].numpy() + n_vertices]).T pos_n = [[i, 0] for i in range(n_vertices)] pos_he = [[i, 1] for i in range(n_hyperedges)] pos = pos_n + pos_he From cb007ef129285ae459e5f3e698bf87fc3a9c3336 Mon Sep 17 00:00:00 2001 From: pierrick Date: Fri, 12 Jul 2024 00:36:58 +0200 Subject: [PATCH 5/8] refactoring for ruff check --- .../transforms/liftings/graph2hypergraph/path_lifting.py | 4 ++-- .../liftings/graph2hypergraph/test_path_lifting.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/transforms/liftings/graph2hypergraph/path_lifting.py b/modules/transforms/liftings/graph2hypergraph/path_lifting.py index f3a29ea2..634a182e 100644 --- a/modules/transforms/liftings/graph2hypergraph/path_lifting.py +++ b/modules/transforms/liftings/graph2hypergraph/path_lifting.py @@ -43,14 +43,14 @@ def find_hyperedges(self, data: torch_geometric.data.Data): s_hyperedges = set() if self.target_nodes is None: # all paths stemming from source nodes only - for source, length in zip(self.source_nodes, self.lengths): + for source, length in zip(self.source_nodes, self.lengths, strict=True): D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) s = self.extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) s_hyperedges = s_hyperedges.union(s) else: # paths from source_nodes to target_nodes or from source nodes only for source, target, length in zip( - self.source_nodes, self.target_nodes, self.lengths + self.source_nodes, self.target_nodes, self.lengths, strict=True ): if target is None: D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) diff --git a/test/transforms/liftings/graph2hypergraph/test_path_lifting.py b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py index a8dd9cc3..56429eb0 100644 --- a/test/transforms/liftings/graph2hypergraph/test_path_lifting.py +++ b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py @@ -75,7 +75,7 @@ def test_3(self): lengths, include_smaller_paths=include_smaller_paths, ).find_hyperedges(self.data) - assert not frozenset({0, 1}) in res + assert frozenset({0, 1}) not in res def test_4(self): """test: include_smaller_paths=True""" @@ -122,10 +122,12 @@ def test_6(self): def test_7(self): """test: every hyperedge contains the source and target nodes when specified""" - a = np.random.choice(np.arange(len(self.data.x)), 2, replace=False) + a = np.random.default_rng().choice( + np.arange(len(self.data.x)), 2, replace=False + ) source_nodes = [a[0]] target_nodes = [a[1]] - lengths = [np.random.randint(1, 5)] + lengths = [np.random.default_rng().integers(1, 5)] include_smaller_paths = False res = PathLifting( source_nodes, From b26701f125c27fd027fe65cd78c377290e8b7209 Mon Sep 17 00:00:00 2001 From: pierrick Date: Fri, 12 Jul 2024 15:41:06 +0200 Subject: [PATCH 6/8] added default behavior of path lifting --- .../liftings/graph2hypergraph/path_lifting.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/transforms/liftings/graph2hypergraph/path_lifting.py b/modules/transforms/liftings/graph2hypergraph/path_lifting.py index 634a182e..0f1811c2 100644 --- a/modules/transforms/liftings/graph2hypergraph/path_lifting.py +++ b/modules/transforms/liftings/graph2hypergraph/path_lifting.py @@ -12,24 +12,24 @@ class PathLifting(Graph2HypergraphLifting): def __init__( self, - source_nodes: list[int], - target_nodes: list[int], - lengths: list[int], + source_nodes: list[int] = None, + target_nodes: list[int] = None, + lengths: list[int] = None, include_smaller_paths=False, **kwargs, ): # guard clauses - if len(source_nodes) != len(lengths): + if ( + lengths is not None + and source_nodes is not None + and len(source_nodes) != len(lengths) + ): raise ValueError("source_nodes and lengths must have the same length") if target_nodes is not None and len(target_nodes) != len(source_nodes): raise ValueError( "When target_nodes is not None, it must have the same length" "as source_nodes" ) - if len(source_nodes) == 0: - raise ValueError( - "source_nodes,target_nodes and lengths must have at least one element" - ) super().__init__(**kwargs) self.source_nodes = source_nodes @@ -37,6 +37,13 @@ def __init__( self.lengths = lengths self.include_smaller_paths = include_smaller_paths + def value_defaults(self, data: torch_geometric.data.Data): + """Sets default values for source_nodes and lengths if not provided.""" + if self.source_nodes is None: + self.source_nodes = np.arange(data.num_nodes) + if self.lengths is None: + self.lengths = [2] * len(self.source_nodes) + def find_hyperedges(self, data: torch_geometric.data.Data): """Finds hyperedges from paths between nodes in a graph.""" G = torch_geometric.utils.convert.to_networkx(data, to_undirected=True) @@ -68,6 +75,8 @@ def find_hyperedges(self, data: torch_geometric.data.Data): return s_hyperedges def lift_topology(self, data: torch_geometric.data.Data): + if self.source_nodes is None or self.lengths is None: + self.value_defaults(data) s_hyperedges = self.find_hyperedges(data) indices = [[], []] for edge_id, x in enumerate(s_hyperedges): From 4754e6c28f1a632da3b0e7999103790b5addabbd Mon Sep 17 00:00:00 2001 From: pierrick Date: Fri, 12 Jul 2024 17:54:06 +0200 Subject: [PATCH 7/8] refactor for ruff check --- .../transforms/liftings/graph2hypergraph/path_lifting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/transforms/liftings/graph2hypergraph/path_lifting.py b/modules/transforms/liftings/graph2hypergraph/path_lifting.py index 0f1811c2..75e0d43a 100644 --- a/modules/transforms/liftings/graph2hypergraph/path_lifting.py +++ b/modules/transforms/liftings/graph2hypergraph/path_lifting.py @@ -12,9 +12,9 @@ class PathLifting(Graph2HypergraphLifting): def __init__( self, - source_nodes: list[int] = None, - target_nodes: list[int] = None, - lengths: list[int] = None, + source_nodes: list[int] | None = None, + target_nodes: list[int] | None = None, + lengths: list[int] | None = None, include_smaller_paths=False, **kwargs, ): From 541045313257266692a318680715cb9ad1dddd21 Mon Sep 17 00:00:00 2001 From: pierrick Date: Fri, 12 Jul 2024 23:58:05 +0200 Subject: [PATCH 8/8] improved code readability --- .../liftings/graph2hypergraph/path_lifting.py | 70 ++++- .../graph2hypergraph/test_path_lifting.py | 14 +- tutorials/graph2hypergraph/path_lifting.ipynb | 286 +++++++++++++++++- 3 files changed, 338 insertions(+), 32 deletions(-) diff --git a/modules/transforms/liftings/graph2hypergraph/path_lifting.py b/modules/transforms/liftings/graph2hypergraph/path_lifting.py index 0f1811c2..59061776 100644 --- a/modules/transforms/liftings/graph2hypergraph/path_lifting.py +++ b/modules/transforms/liftings/graph2hypergraph/path_lifting.py @@ -18,6 +18,22 @@ def __init__( include_smaller_paths=False, **kwargs, ): + """Init function + + Args: + source_nodes (list[int], optional): a list of nodes from which to start the paths. + Defaults to None in __init__ but is later valued in value_defaults(). + target_nodes (list[int], optional): a list of nodes where the paths must end. + Defaults to None. + lengths (list[int], optional): a list of paths lenghts. + Defaults to None in __init__ but is later valued in value_defaults(). + include_smaller_paths (bool, optional): whether or not to include paths from source + to target smaller than the length specified. Defaults to False. + + Raises: + ValueError: when provided source_nodes and lengths must have the same length + ValueError: when provided target_nodes and source_nodes must have the same length + """ # guard clauses if ( lengths is not None @@ -37,22 +53,22 @@ def __init__( self.lengths = lengths self.include_smaller_paths = include_smaller_paths - def value_defaults(self, data: torch_geometric.data.Data): + def _value_defaults(self, data: torch_geometric.data.Data): """Sets default values for source_nodes and lengths if not provided.""" if self.source_nodes is None: self.source_nodes = np.arange(data.num_nodes) if self.lengths is None: self.lengths = [2] * len(self.source_nodes) - def find_hyperedges(self, data: torch_geometric.data.Data): + def _find_hyperedges(self, data: torch_geometric.data.Data): """Finds hyperedges from paths between nodes in a graph.""" G = torch_geometric.utils.convert.to_networkx(data, to_undirected=True) s_hyperedges = set() if self.target_nodes is None: # all paths stemming from source nodes only for source, length in zip(self.source_nodes, self.lengths, strict=True): - D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) - s = self.extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) + D, d_id2label, l_leafs = self._build_stemmingTree(G, source, length) + s = self._extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) s_hyperedges = s_hyperedges.union(s) else: # paths from source_nodes to target_nodes or from source nodes only @@ -60,8 +76,8 @@ def find_hyperedges(self, data: torch_geometric.data.Data): self.source_nodes, self.target_nodes, self.lengths, strict=True ): if target is None: - D, d_id2label, l_leafs = self.build_stemmingTree(G, source, length) - s = self.extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) + D, d_id2label, l_leafs = self._build_stemmingTree(G, source, length) + s = self._extract_hyperedgesFromStemmingTree(D, d_id2label, l_leafs) s_hyperedges = s_hyperedges.union(s) else: paths = list( @@ -75,9 +91,10 @@ def find_hyperedges(self, data: torch_geometric.data.Data): return s_hyperedges def lift_topology(self, data: torch_geometric.data.Data): + """Lifts the graph data to a hypergraph by considering paths between nodes.""" if self.source_nodes is None or self.lengths is None: - self.value_defaults(data) - s_hyperedges = self.find_hyperedges(data) + self._value_defaults(data) + s_hyperedges = self._find_hyperedges(data) indices = [[], []] for edge_id, x in enumerate(s_hyperedges): indices[1].extend([edge_id] * len(x)) @@ -91,8 +108,22 @@ def lift_topology(self, data: torch_geometric.data.Data): "x_0": data.x, } - def build_stemmingTree(self, G, source_root, length, verbose=False): - """Creates a directed tree from a source node with paths of a given length.""" + def _build_stemmingTree(self, G, source_root, length, verbose=False): + """Creates a directed tree from a source node with paths of a given length. + This directed tree has as root the source node and paths stemming from it. + This tree is used to extract hyperedges from paths to leafs. + + Args: + G (networkx.classes.graph.Graph): the original graph + source_root (int): the source node from which to start the paths + length (int): the length of the paths + verbose (bool, optional): Defaults to False. + + Returns: + D (networkx.classes.graph.DiGraph): a directed tree stemming from source_root + d_id2label (dict): a dictionary mapping node ids to node labels + l_leafs (list): a list of leaf nodes ids + """ d_id2label = {} stack = [] D = nx.DiGraph() @@ -115,11 +146,11 @@ def build_stemmingTree(self, G, source_root, length, verbose=False): stack.append(n_id) elif len(visited_labels) == length: l_leafs.append(n_id) - else: + else: # security check raise ValueError("Visited labels length is greater than length") D.add_edge(node, n_id) n_id += 1 - if verbose: + if verbose: # output information during the process print("\nLoop Variables Summary:") print("nodes:", node) print("neighbors:", neighbors) @@ -129,15 +160,24 @@ def build_stemmingTree(self, G, source_root, length, verbose=False): print("id2label:", d_id2label) return D, d_id2label, l_leafs - def extract_hyperedgesFromStemmingTree(self, D, d_id2label, l_leafs): + def _extract_hyperedgesFromStemmingTree(self, D, d_id2label, l_leafs): """From the root of the directed tree D, - extract hyperedges from the paths to the leafs.""" + extract hyperedges from the paths to the leafs. + + Args: + D (networkx.classes.graph.DiGraph): a directed tree stemming from source_root + d_id2label (dict): a dictionary mapping node ids to node labels + l_leafs (list): a list of leaf nodes ids + + Returns: + _type_: _description_ + """ a_paths = np.array( [list(map(d_id2label.get, nx.shortest_path(D, 0, x))) for x in l_leafs] ) s_hyperedges = { (frozenset(x)) for x in a_paths - } # set bc != paths can be same hpedge + } # set because different paths can be same hyperedge if self.include_smaller_paths: for i in range(a_paths.shape[1] - 1, 1, -1): a_paths = np.unique(a_paths[:, :i], axis=0) diff --git a/test/transforms/liftings/graph2hypergraph/test_path_lifting.py b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py index 56429eb0..ad64229e 100644 --- a/test/transforms/liftings/graph2hypergraph/test_path_lifting.py +++ b/test/transforms/liftings/graph2hypergraph/test_path_lifting.py @@ -41,7 +41,7 @@ def test_2(self): lengths, include_smaller_paths=include_smaller_paths, ) - res = path_lifting.find_hyperedges(self.data) + res = path_lifting._find_hyperedges(self.data) res_expected = [ [0, 1], [0, 1, 2], @@ -74,7 +74,7 @@ def test_3(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) assert frozenset({0, 1}) not in res def test_4(self): @@ -88,7 +88,7 @@ def test_4(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) assert frozenset({0, 1}) in res def test_5(self): @@ -103,7 +103,7 @@ def test_5(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) assert np.array([len(x) - 1 == k for x in res]).all() def test_6(self): @@ -117,7 +117,7 @@ def test_6(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) assert len(res) > 0 def test_7(self): @@ -134,7 +134,7 @@ def test_7(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) if len(res) > 0: assert ( np.array([source_nodes[0] in x for x in res]).all() @@ -152,5 +152,5 @@ def test_8(self): target_nodes, lengths, include_smaller_paths=include_smaller_paths, - ).find_hyperedges(self.data) + )._find_hyperedges(self.data) assert len(res) > 0 diff --git a/tutorials/graph2hypergraph/path_lifting.ipynb b/tutorials/graph2hypergraph/path_lifting.ipynb index 5269dada..610a7184 100644 --- a/tutorials/graph2hypergraph/path_lifting.ipynb +++ b/tutorials/graph2hypergraph/path_lifting.ipynb @@ -156,14 +156,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Loading and Applying the Lifting" + "## Loading and Applying the Lifting\n", + "In this section we will instantiate the lifting we want to apply to the data.\n", + "***\n", + "**Description of the lifting**\n", + "\n", + "This lifting is based on paths. Intuitively, we specify which paths are of interest by giving for each path a source node, a length and optionally a target node.\n", + "Formally the lifting is parametrized by a set of tuples of the form: $$(u, v, n) \\in V\\times V\\times\\mathbb N^{*}$$\n", + "where $V$ is the set of vertices and $n$ is the length of the paths between nodes $u$ and $v$. The lifting is then a family of such tuples $(u_i, v_i, n_i)$.\n", + "The graph is treated as undirected. Features are lifted using the standard sum projection according to the incidence matrix.\n", + "***" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In this section we will instantiate the lifting we want to apply to the data." + "### Example 1: basic\n", + "Here we define a path lifting where paths are between node 1 (source) and node 2 (target) and have exactly length 2. This is specified by the parameter ```include_smaller_paths```. There are two hyperedges in the results corresponding to path (v1, v0, V2) and (v1, v4, V2)" ] }, { @@ -195,13 +205,10 @@ " \"lifting\": load_transform_config(transform_type, transform_id)\n", " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", "}\n", - "# add required arguments\n", + "# parametrize the transform\n", "transform_config[\"lifting\"][\"source_nodes\"] = [1]\n", "transform_config[\"lifting\"][\"target_nodes\"] = [2]\n", - "transform_config[\"lifting\"][\"lengths\"] = [2]\n", - "\n", - "# select a value for optional argument\n", - "transform_config[\"lifting\"][\"include_smaller_paths\"] = False" + "transform_config[\"lifting\"][\"lengths\"] = [2]" ] }, { @@ -213,11 +220,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Transform parameters are the same, using existing data_dir: /home/pierrick/local/challenge-icml-2024/datasets/graph/toy_dataset/manual/lifting/480906646\n", "\n", "Dataset only contains 1 sample:\n" ] }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + }, { "data": { "image/png": "", @@ -244,6 +258,258 @@ "describe_data(lifted_dataset)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: adding shorter paths\n", + "Here we define the same path lifting as before but we include all paths shorter than the specified length as it is indicated by ```include_smaller_paths=True```. Indeed, we see that another hyperedge is added." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for graph2hypergraph/path_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'PathLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n" + ] + } + ], + "source": [ + "# Define transformation type and id\n", + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"graph2hypergraph/path_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}\n", + "# parametrize the transform\n", + "transform_config[\"lifting\"][\"source_nodes\"] = [1]\n", + "transform_config[\"lifting\"][\"target_nodes\"] = [2]\n", + "transform_config[\"lifting\"][\"lengths\"] = [2]\n", + "transform_config[\"lifting\"][\"include_smaller_paths\"] = True" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transform parameters are the same, using existing data_dir: /home/pierrick/local/challenge-icml-2024/datasets/graph/toy_dataset/manual/lifting/2187712706\n", + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - Hypergraph with 8 vertices and 3 hyperedges.\n", + " - The nodes have feature dimensions 1.\n", + " - The hyperedges have feature dimensions 1.\n", + "\n" + ] + } + ], + "source": [ + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: multiple sources and targets\n", + "We can ask for paths stemming from multiple nodes, it is done by passing to the parameter ```source_nodes``` multiple elements in the list. The ```target_nodes``` list and ```lengths``` list contain as much elements as ```source_nodes``` does. When an element of ```target_nodes``` is None, then there is no target for this node and all paths paths stemming from the source are keps, indifferently of where they finish." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for graph2hypergraph/path_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'PathLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n" + ] + } + ], + "source": [ + "# Define transformation type and id\n", + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"graph2hypergraph/path_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}\n", + "# parametrize the transform\n", + "transform_config[\"lifting\"][\"source_nodes\"] = [1, 5, 0]\n", + "transform_config[\"lifting\"][\"target_nodes\"] = [2, 6, None]\n", + "transform_config[\"lifting\"][\"lengths\"] = [2, 3, 1]\n", + "transform_config[\"lifting\"][\"include_smaller_paths\"] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transform parameters are the same, using existing data_dir: /home/pierrick/local/challenge-icml-2024/datasets/graph/toy_dataset/manual/lifting/3803470566\n", + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - Hypergraph with 8 vertices and 7 hyperedges.\n", + " - The nodes have feature dimensions 1.\n", + " - The hyperedges have feature dimensions 1.\n", + "\n" + ] + } + ], + "source": [ + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 4: randomized lifting\n", + "The hyperedge generation can be randomized, here we draw paths between 3 pairs of nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for graph2hypergraph/path_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'PathLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n", + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - Hypergraph with 8 vertices and 6 hyperedges.\n", + " - The nodes have feature dimensions 1.\n", + " - The hyperedges have feature dimensions 1.\n", + "\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# Define transformation type and id\n", + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"graph2hypergraph/path_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}\n", + "# parametrize the transform\n", + "transform_config[\"lifting\"][\"source_nodes\"] = torch.randint(\n", + " 0, dataset._data.num_nodes, (3,)\n", + ").tolist()\n", + "transform_config[\"lifting\"][\"target_nodes\"] = torch.randint(\n", + " 0, dataset._data.num_nodes, (3,)\n", + ").tolist()\n", + "transform_config[\"lifting\"][\"lengths\"] = torch.randint(\n", + " 0, dataset._data.num_nodes, (3,)\n", + ").tolist()\n", + "transform_config[\"lifting\"][\"include_smaller_paths\"] = False\n", + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -260,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -289,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [