From b6e7c757f3999742338568f25846fe47d4948455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Zahradn=C3=ADk?= Date: Sun, 26 Nov 2023 21:40:59 +0100 Subject: [PATCH] Introduce Sample as entry for Dataset --- benchmarks/pyneuralogic_benchmark.py | 2 +- examples/datasets/horses.py | 32 +++---- .../multiple_examples_no_order_trains.py | 29 +++--- examples/datasets/multiple_examples_trains.py | 28 +++--- examples/datasets/naive_trains.py | 54 ++++++------ examples/datasets/naive_xor.py | 16 ++-- examples/datasets/vectorized_xor.py | 12 +-- neuralogic/core/builder/__init__.py | 4 +- neuralogic/core/builder/builder.py | 18 ++-- neuralogic/core/builder/components.py | 28 +----- neuralogic/core/builder/dataset_builder.py | 52 +++++------ neuralogic/dataset/__init__.py | 7 +- neuralogic/dataset/base.py | 11 +-- neuralogic/dataset/csv.py | 29 +++--- neuralogic/dataset/db.py | 4 +- neuralogic/dataset/logic.py | 88 ++++++++++++------- neuralogic/dataset/tensor.py | 11 ++- .../inference/evaluation_inference_engine.py | 19 ++-- neuralogic/nn/base.py | 6 -- neuralogic/nn/torch_function.py | 4 +- tests/test_csv_datasets.py | 88 ++++++++++--------- tests/test_evaluation_inference_engine.py | 2 +- tests/test_function.py | 10 +-- tests/test_general_modules.py | 8 +- tests/test_quick_start.py | 79 ++++++++--------- tests/test_recurrent_modules.py | 46 +++++----- tests/test_transformer.py | 4 +- tests/test_xor_generalization.py | 34 +++---- 28 files changed, 347 insertions(+), 378 deletions(-) diff --git a/benchmarks/pyneuralogic_benchmark.py b/benchmarks/pyneuralogic_benchmark.py index 706ed651..3e0de113 100644 --- a/benchmarks/pyneuralogic_benchmark.py +++ b/benchmarks/pyneuralogic_benchmark.py @@ -167,7 +167,7 @@ def evaluate(model, dataset, steps, dataset_loc, dim, task: Task): dataset.number_of_classes = task.output_size dataset.one_hot_encoding = True - built_dataset = model.build_dataset(dataset, file_mode=True) + built_dataset = model.build_dataset(dataset) build_time = time.perf_counter() - start_time start_time = time.perf_counter() diff --git a/examples/datasets/horses.py b/examples/datasets/horses.py index d01a93fe..b1ba94b1 100644 --- a/examples/datasets/horses.py +++ b/examples/datasets/horses.py @@ -9,26 +9,20 @@ template.add_rules( [ - Relation.foal(Var.X)[1,] <= (Relation.parent(Var.X, Var.Y), Relation.horse(Var.Y)), # todo gusta: mozna prejmenovat Atom -> Predicate by odpovidalo skutecnosti prirozeneji? - Relation.foal(Var.X)[1,] <= (Relation.sibling(Var.X, Var.Y), Relation.horse(Var.Y)), - Relation.negFoal(Var.X)[1,] <= Relation.foal(Var.X), + Relation.foal(Var.X)[1, ] <= (Relation.parent(Var.X, Var.Y), Relation.horse(Var.Y)), # todo gusta: mozna prejmenovat Atom -> Predicate by odpovidalo skutecnosti prirozeneji? + Relation.foal(Var.X)[1, ] <= (Relation.sibling(Var.X, Var.Y), Relation.horse(Var.Y)), + Relation.negFoal(Var.X)[1, ] <= Relation.foal(Var.X), ] ) -dataset.add_example( - [ - Relation.horse(Constant.aida)[1.0], - Relation.horse(Constant.cheyenne)[1.0], - Relation.horse(Constant.dakotta)[1.0], - Relation.parent(Constant.star, Constant.cheyenne)[1.0], - Relation.parent(Constant.star, Constant.aida)[1.0], - Relation.parent(Constant.star, Constant.dakotta)[1.0], - ] -) +example = [ + Relation.horse(Constant.aida)[1.0], + Relation.horse(Constant.cheyenne)[1.0], + Relation.horse(Constant.dakotta)[1.0], + Relation.parent(Constant.star, Constant.cheyenne)[1.0], + Relation.parent(Constant.star, Constant.aida)[1.0], + Relation.parent(Constant.star, Constant.dakotta)[1.0], +] -dataset.add_queries( - [ - Relation.foal(Constant.star)[1.0], - Relation.negFoal(Constant.star)[0.0], - ] -) +dataset.add(Relation.foal(Constant.star)[1.0], example) +dataset.add(Relation.negFoal(Constant.star)[0.0], example) diff --git a/examples/datasets/multiple_examples_no_order_trains.py b/examples/datasets/multiple_examples_no_order_trains.py index 505a4e20..6d13f5f5 100644 --- a/examples/datasets/multiple_examples_no_order_trains.py +++ b/examples/datasets/multiple_examples_no_order_trains.py @@ -18,20 +18,20 @@ loadshapes = [Constant.hexagon, Constant.triangle, Constant.diamond, Constant.rectangle, Constant.circle] vagon_atoms = [Relation.shape, Relation.length, Relation.sides, Relation.wheels, Relation.loadnum, Relation.loadshape, Relation.roof] -Y = Var.Y #todo gusta: tohle je dobry trik, ten bych pouzival na vic mistech, a podobne pro Atom/Predicate factories udelat zkratky (treba P.) +Y = Var.Y # todo gusta: tohle je dobry trik, ten bych pouzival na vic mistech, a podobne pro Atom/Predicate factories udelat zkratky (treba P.) template.add_rules( [ - *[Relation.shape(Y) <= Relation.shape(Y, s)[1,] for s in shapes], - *[Relation.length(Y) <= Relation.length(Y, s)[1,] for s in [Constant.short, Constant.long]], - *[Relation.sides(Y) <= Relation.sides(Y, s)[1,] for s in [Constant.not_double, Constant.double]], - *[Relation.roof(Y) <= Relation.roof(Y, s)[1,] for s in roofs], - *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1,] for s in [2, 3]], - *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1,] for s in [0, 1, 2, 3]], - *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1,] for s in loadshapes], - Relation.vagon(Y) <= (atom(Y)[1,] for atom in vagon_atoms), - Relation.train <= Relation.vagon(Y)[1,], - Relation.direction <= Relation.train[1,], + *[Relation.shape(Y) <= Relation.shape(Y, s)[1, ] for s in shapes], + *[Relation.length(Y) <= Relation.length(Y, s)[1, ] for s in [Constant.short, Constant.long]], + *[Relation.sides(Y) <= Relation.sides(Y, s)[1, ] for s in [Constant.not_double, Constant.double]], + *[Relation.roof(Y) <= Relation.roof(Y, s)[1, ] for s in roofs], + *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1, ] for s in [2, 3]], + *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1, ] for s in [0, 1, 2, 3]], + *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1, ] for s in loadshapes], + Relation.vagon(Y) <= (atom(Y)[1, ] for atom in vagon_atoms), + Relation.train <= Relation.vagon(Y)[1, ], + Relation.direction <= Relation.train[1, ], ] ) @@ -52,5 +52,8 @@ ] ) -dataset.add_examples(examples) -dataset.add_queries([*[Relation.direction[1.0] for _ in range(1, 11)], *[Relation.direction[-1.0] for _ in range(11, 21)]]) +for example in examples[:10]: + dataset.add(Relation.direction[1.0], example) + +for example in examples[10:]: + dataset.add(Relation.direction[-1.0], example) diff --git a/examples/datasets/multiple_examples_trains.py b/examples/datasets/multiple_examples_trains.py index 3fd8a75c..678aae89 100644 --- a/examples/datasets/multiple_examples_trains.py +++ b/examples/datasets/multiple_examples_trains.py @@ -21,16 +21,16 @@ template.add_rules( [ - *[Relation.shape(Y) <= Relation.shape(Y, s)[1,] for s in shapes], - *[Relation.length(Y) <= Relation.length(Y, s)[1,] for s in [Constant.short, Constant.long]], - *[Relation.sides(Y) <= Relation.sides(Y, s)[1,] for s in [Constant.not_double, Constant.double]], - *[Relation.roof(Y) <= Relation.roof(Y, s)[1,] for s in roofs], - *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1,] for s in [2, 3]], - *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1,] for s in [0, 1, 2, 3]], - *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1,] for s in loadshapes], - Relation.vagon(Y) <= (atom(Y)[1,] for atom in vagon_atoms), - *[Relation.train <= Relation.vagon(i)[1,] for i in [1, 2, 3, 4]], - Relation.direction <= Relation.train[1,], + *[Relation.shape(Y) <= Relation.shape(Y, s)[1, ] for s in shapes], + *[Relation.length(Y) <= Relation.length(Y, s)[1, ] for s in [Constant.short, Constant.long]], + *[Relation.sides(Y) <= Relation.sides(Y, s)[1, ] for s in [Constant.not_double, Constant.double]], + *[Relation.roof(Y) <= Relation.roof(Y, s)[1, ] for s in roofs], + *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1, ] for s in [2, 3]], + *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1, ] for s in [0, 1, 2, 3]], + *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1, ] for s in loadshapes], + Relation.vagon(Y) <= (atom(Y)[1, ] for atom in vagon_atoms), + *[Relation.train <= Relation.vagon(i)[1, ] for i in [1, 2, 3, 4]], + Relation.direction <= Relation.train[1, ], ] ) @@ -50,5 +50,9 @@ ] ) -dataset.add_examples(examples) -dataset.add_queries([*[Relation.direction[1.0] for _ in range(1, 11)], *[Relation.direction[-1.0] for _ in range(11, 21)]]) + +for example in examples[:10]: + dataset.add(Relation.direction[1.0], example) + +for example in examples[10:]: + dataset.add(Relation.direction[-1.0], example) diff --git a/examples/datasets/naive_trains.py b/examples/datasets/naive_trains.py index 1730917f..7e01a696 100644 --- a/examples/datasets/naive_trains.py +++ b/examples/datasets/naive_trains.py @@ -21,35 +21,35 @@ template.add_rules( [ - *[Relation.shape(X, Y) <= Relation.shape(X, Y, s)[1,] for s in shapes], - *[Relation.length(X, Y) <= Relation.length(X, Y, s)[1,] for s in [Constant.short, Constant.long]], - *[Relation.sides(X, Y) <= Relation.sides(X, Y, s)[1,] for s in [Constant.not_double, Constant.double]], - *[Relation.roof(X, Y) <= Relation.roof(X, Y, s)[1,] for s in roofs], - *[Relation.wheels(X, Y) <= Relation.wheels(X, Y, s)[1,] for s in [2, 3]], - *[Relation.loadnum(X, Y) <= Relation.loadnum(X, Y, s)[1,] for s in [0, 1, 2, 3]], - *[Relation.loadshape(X, Y) <= Relation.loadshape(X, Y, s)[1,] for s in loadshapes], - Relation.vagon(X, Y) <= (atom(X, Y)[1,] for atom in vagon_atoms), - *[Relation.train(X) <= Relation.vagon(X, i)[1,] for i in [1, 2, 3, 4]], - Relation.direction(X) <= Relation.train(X)[1,], + *[Relation.shape(X, Y) <= Relation.shape(X, Y, s)[1, ] for s in shapes], + *[Relation.length(X, Y) <= Relation.length(X, Y, s)[1, ] for s in [Constant.short, Constant.long]], + *[Relation.sides(X, Y) <= Relation.sides(X, Y, s)[1, ] for s in [Constant.not_double, Constant.double]], + *[Relation.roof(X, Y) <= Relation.roof(X, Y, s)[1, ] for s in roofs], + *[Relation.wheels(X, Y) <= Relation.wheels(X, Y, s)[1, ] for s in [2, 3]], + *[Relation.loadnum(X, Y) <= Relation.loadnum(X, Y, s)[1, ] for s in [0, 1, 2, 3]], + *[Relation.loadshape(X, Y) <= Relation.loadshape(X, Y, s)[1, ] for s in loadshapes], + Relation.vagon(X, Y) <= (atom(X, Y)[1, ] for atom in vagon_atoms), + *[Relation.train(X) <= Relation.vagon(X, i)[1, ] for i in [1, 2, 3, 4]], + Relation.direction(X) <= Relation.train(X)[1, ], ] ) -dataset.add_example( - [ - atom - for _, id, pos, shape, length, sides, roof, wheels, load, loadnum in train_example_data - for atom in [ - Relation.shape(id, pos, shape), - Relation.length(id, pos, length), - Relation.sides(id, pos, sides), - Relation.roof(id, pos, roof), - Relation.wheels(id, pos, wheels), - Relation.loadshape(id, pos, load), - Relation.loadnum(id, pos, loadnum), - ] +example = [ + atom + for _, id, pos, shape, length, sides, roof, wheels, load, loadnum in train_example_data + for atom in [ + Relation.shape(id, pos, shape), + Relation.length(id, pos, length), + Relation.sides(id, pos, sides), + Relation.roof(id, pos, roof), + Relation.wheels(id, pos, wheels), + Relation.loadshape(id, pos, load), + Relation.loadnum(id, pos, loadnum), ] -) +] -dataset.add_queries( - [*[Relation.direction(i)[1.0] for i in range(1, 11)], *[Relation.direction(i)[-1.0] for i in range(11, 21)]] -) +for i in range(1, 11): + dataset.add(Relation.direction(i)[1.0], example) + +for i in range(11, 21): + dataset.add(Relation.direction(i)[-1.0], example) diff --git a/examples/datasets/naive_xor.py b/examples/datasets/naive_xor.py index 6e3ea049..a6001cc1 100644 --- a/examples/datasets/naive_xor.py +++ b/examples/datasets/naive_xor.py @@ -1,5 +1,5 @@ from neuralogic.core import Relation, Template -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample dataset = Dataset() @@ -8,16 +8,16 @@ # fmt: off # hidden<1-8> :- {1} a, {1} b. -template.add_rules([Relation.get(f"hidden{i}") <= (Relation.a[1,], Relation.b[1,]) for i in range(1, 9)]) +template.add_rules([Relation.get(f"hidden{i}") <= (Relation.a[1, ], Relation.b[1, ]) for i in range(1, 9)]) # {1} xor :- hidden<1-8>. -template.add_rules([Relation.xor[1,] <= Relation.get(f"hidden{i}") for i in range(1, 9)]) +template.add_rules([Relation.xor[1, ] <= Relation.get(f"hidden{i}") for i in range(1, 9)]) -dataset.add_examples( +dataset.add_samples( [ # Add 4 examples - Relation.xor[0] <= (Relation.a[0], Relation.b[0]), - Relation.xor[1] <= (Relation.a[1], Relation.b[0]), - Relation.xor[1] <= (Relation.a[0], Relation.b[1]), - Relation.xor[0] <= (Relation.a[1], Relation.b[1]), + Sample(Relation.xor[0], [Relation.a[0], Relation.b[0]]), + Sample(Relation.xor[1], [Relation.a[1], Relation.b[0]]), + Sample(Relation.xor[1], [Relation.a[0], Relation.b[1]]), + Sample(Relation.xor[0], [Relation.a[1], Relation.b[1]]), ] ) diff --git a/examples/datasets/vectorized_xor.py b/examples/datasets/vectorized_xor.py index 33b5c3e9..f26986c4 100644 --- a/examples/datasets/vectorized_xor.py +++ b/examples/datasets/vectorized_xor.py @@ -1,5 +1,5 @@ from neuralogic.core import Relation, Template -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample dataset = Dataset() @@ -9,11 +9,11 @@ template.add_rule(Relation.xor[1, 8] <= Relation.xy[8, 2]) # Add template rule -dataset.add_examples( +dataset.add_samples( [ # Add 4 examples - Relation.xor[0] <= Relation.xy[[0, 0]], - Relation.xor[1] <= Relation.xy[[0, 1]], - Relation.xor[1] <= Relation.xy[[1, 0]], - Relation.xor[0] <= Relation.xy[[1, 1]], + Sample(Relation.xor[0], Relation.xy[[0, 0]]), + Sample(Relation.xor[1], Relation.xy[[0, 1]]), + Sample(Relation.xor[1], Relation.xy[[1, 0]]), + Sample(Relation.xor[0], Relation.xy[[1, 1]]), ] ) diff --git a/neuralogic/core/builder/__init__.py b/neuralogic/core/builder/__init__.py index 564d5237..058a320f 100644 --- a/neuralogic/core/builder/__init__.py +++ b/neuralogic/core/builder/__init__.py @@ -1,5 +1,5 @@ from neuralogic.core.builder.builder import Builder from neuralogic.core.builder.dataset_builder import DatasetBuilder -from neuralogic.core.builder.components import Neuron, Weight, Sample, BuiltDataset, GroundedDataset +from neuralogic.core.builder.components import Neuron, Weight, BuiltDataset, GroundedDataset -__all__ = ["Builder", "DatasetBuilder", "Neuron", "Weight", "Sample", "BuiltDataset", "GroundedDataset"] +__all__ = ["Builder", "DatasetBuilder", "Neuron", "Weight", "BuiltDataset", "GroundedDataset"] diff --git a/neuralogic/core/builder/builder.py b/neuralogic/core/builder/builder.py index f0e7e28c..718a9391 100644 --- a/neuralogic/core/builder/builder.py +++ b/neuralogic/core/builder/builder.py @@ -4,7 +4,7 @@ from tqdm.autonotebook import tqdm from neuralogic import is_initialized, initialize -from neuralogic.core.builder.components import Sample, RawSample +from neuralogic.core.builder.components import NeuralSample from neuralogic.core.settings import SettingsProxy from neuralogic.core.sources import Sources @@ -58,7 +58,7 @@ def ground_from_sources(self, parsed_template, sources: Sources): def ground_from_logic_samples(self, parsed_template, logic_samples): return self._ground(parsed_template, None, logic_samples) - def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) -> List[RawSample]: + def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) -> List[NeuralSample]: if sources is not None: ground_pipeline = self.example_builder.buildGroundings(parsed_template, sources.sources) else: @@ -69,20 +69,20 @@ def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) -> return ground_pipeline.get() - def neuralize(self, groundings, progress: bool, length: Optional[int]) -> List[RawSample]: + def neuralize(self, groundings, progress: bool, length: Optional[int]) -> List[NeuralSample]: if not progress: return self._neuralize(groundings, None) with tqdm(total=length, desc="Building", unit=" samples", dynamic_ncols=True) as pbar: return self._neuralize(groundings, self._callback(pbar)) - def _neuralize(self, groundings, callback) -> List[RawSample]: + def _neuralize(self, groundings, callback) -> List[NeuralSample]: neuralize_pipeline = self.example_builder.neuralize(groundings, None) neuralize_pipeline.execute(None) samples = neuralize_pipeline.get() logic_samples = samples.collect(self.collectors.toList()) - return [RawSample(sample, None) for sample in logic_samples] + return [NeuralSample(sample, None) for sample in logic_samples] def build_model(self, parsed_template, settings: SettingsProxy): neural_model = self.neural_model(parsed_template.getAllWeights(), settings.settings) @@ -95,14 +95,6 @@ def get_builders(settings: SettingsProxy): return builder - @staticmethod - def build(samples): - serializer = jpype.JClass("cz.cvut.fel.ida.neural.networks.structure.export.NeuralSerializer")() - super_detailed_format = jpype.JClass("cz.cvut.fel.ida.setup.Settings").superDetailedNumberFormat - serializer.numberFormat = super_detailed_format - - return [Sample(serializer.serialize(sample), sample) for sample in samples] - @staticmethod def _get_spinner_text(count: int) -> str: if count == 1: diff --git a/neuralogic/core/builder/components.py b/neuralogic/core/builder/components.py index 157168a1..9624eff1 100644 --- a/neuralogic/core/builder/components.py +++ b/neuralogic/core/builder/components.py @@ -8,7 +8,7 @@ from neuralogic.utils.visualize import draw_sample, draw_grounding -class RawSample: +class NeuralSample: __slots__ = "java_sample", "fact_cache", "grounding" def __init__(self, sample, grounding): @@ -70,30 +70,6 @@ def draw( return draw_sample(self, filename, show, img_type, value_detail, graphviz_path, *args, **kwargs) -class Sample(RawSample): - __slots__ = ("id", "target", "neurons", "output_neuron", "java_sample") - - def __init__(self, sample, java_sample): - super().__init__(sample, None) - serialized_sample = json.loads(str(sample.exportToJson())) - - self.id = serialized_sample["id"] - self.target = json.loads(serialized_sample["target"]) - self.neurons = Sample.deserialize_network(serialized_sample["network"]) - self.output_neuron = self.neurons[-1].index - self.java_sample = java_sample - - @staticmethod - def deserialize_network(network): - neurons = [] - - for i, neuron in enumerate(network): - neuron_object = Neuron(neuron, i) - neurons.append(neuron_object) - - return neurons - - class Neuron: def __init__(self, neuron: Dict[str, Any], index): self.index = index @@ -159,7 +135,7 @@ class BuiltDataset: __slots__ = "samples", "batch_size" - def __init__(self, samples: List[RawSample], batch_size: int): + def __init__(self, samples: List[NeuralSample], batch_size: int): self.samples = samples self.batch_size = batch_size diff --git a/neuralogic/core/builder/dataset_builder.py b/neuralogic/core/builder/dataset_builder.py index b86a598d..9bc9a659 100644 --- a/neuralogic/core/builder/dataset_builder.py +++ b/neuralogic/core/builder/dataset_builder.py @@ -1,5 +1,4 @@ -import tempfile -from typing import Union, Set, Dict +from typing import Union, Set, Dict, List import jpype @@ -13,7 +12,6 @@ from neuralogic.core.settings import SettingsProxy from neuralogic.core.sources import Sources - TemplateEntries = Union[BaseRelation, WeightedRelation, Rule] @@ -74,6 +72,9 @@ def build_examples(self, examples, examples_builder, learnable_facts=False): examples_queries = False for example in examples: + if example is None: + example = [] + label, lifted_example = self.java_factory.get_lifted_example(example, learnable_facts) example_query = False @@ -110,7 +111,6 @@ def ground_dataset( settings: SettingsProxy, *, batch_size: int = 1, - file_mode: bool = False, learnable_facts: bool = False, ) -> GroundedDataset: """Grounds the dataset @@ -118,33 +118,14 @@ def ground_dataset( :param dataset: :param settings: :param batch_size: - :param file_mode: :param learnable_facts: :return: """ - if isinstance(dataset, datasets.TensorDataset) and file_mode: - with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as q_tf, tempfile.NamedTemporaryFile( - mode="w", suffix=".txt" - ) as e_tf: - dataset.dump(q_tf, e_tf) - - q_tf.flush() - e_tf.flush() - - return self.ground_dataset( - datasets.FileDataset(e_tf.name, q_tf.name), - settings, - batch_size=batch_size, - file_mode=False, - learnable_facts=learnable_facts, - ) - - if isinstance(dataset, datasets.ConvertableDataset): + if isinstance(dataset, datasets.ConvertibleDataset): return self.ground_dataset( dataset.to_dataset(), settings, batch_size=batch_size, - file_mode=False, learnable_facts=learnable_facts, ) @@ -166,9 +147,7 @@ def ground_dataset( query_builder.setFactoriesFrom(examples_builder) settings.settings.groundingMode = self.grounding_mode.INDEPENDENT - - examples = dataset.examples - queries = dataset.queries + examples, queries = self.samples_to_examples_and_queries(dataset.samples) if len(examples) == 1: settings.settings.groundingMode = self.grounding_mode.GLOBAL @@ -209,7 +188,6 @@ def build_dataset( settings: SettingsProxy, *, batch_size: int = 1, - file_mode: bool = False, learnable_facts: bool = False, progress: bool = False, ) -> BuiltDataset: @@ -218,7 +196,6 @@ def build_dataset( :param dataset: :param settings: :param batch_size: - :param file_mode: :param learnable_facts: :param progress: :return: @@ -227,7 +204,7 @@ def build_dataset( if not isinstance(dataset, GroundedDataset): grounded_dataset = self.ground_dataset( - dataset, settings, batch_size=batch_size, file_mode=file_mode, learnable_facts=learnable_facts + dataset, settings, batch_size=batch_size, learnable_facts=learnable_facts ) return BuiltDataset(grounded_dataset.neuralize(progress), batch_size) @@ -279,3 +256,18 @@ def merge_queries_with_examples(queries, examples, one_query_per_example, exampl query_object.query.evidence = example_object.query.evidence logic_samples.append(query) return logic_samples + + @staticmethod + def samples_to_examples_and_queries(samples: List): + example_dict = {} + queries_dict = {} + + for sample in samples: + idx = id(sample.example) + + if idx not in example_dict: + queries_dict[idx] = [sample.query] + example_dict[idx] = sample.example + else: + queries_dict[idx].append(sample.query) + return example_dict.values(), queries_dict.values() diff --git a/neuralogic/dataset/__init__.py b/neuralogic/dataset/__init__.py index 5f12ebc4..18dd7dee 100644 --- a/neuralogic/dataset/__init__.py +++ b/neuralogic/dataset/__init__.py @@ -1,13 +1,14 @@ -from neuralogic.dataset.base import BaseDataset, ConvertableDataset +from neuralogic.dataset.base import BaseDataset, ConvertibleDataset from neuralogic.dataset.file import FileDataset -from neuralogic.dataset.logic import Dataset +from neuralogic.dataset.logic import Dataset, Sample from neuralogic.dataset.tensor import TensorDataset, Data from neuralogic.dataset.csv import Mode, CSVFile, CSVDataset from neuralogic.dataset.db import DBDataset, DBSource __all__ = [ "BaseDataset", - "ConvertableDataset", + "ConvertibleDataset", + "Sample", "FileDataset", "Dataset", "TensorDataset", diff --git a/neuralogic/dataset/base.py b/neuralogic/dataset/base.py index e20ee653..78214039 100644 --- a/neuralogic/dataset/base.py +++ b/neuralogic/dataset/base.py @@ -7,16 +7,7 @@ def dump( ): raise NotImplementedError - def dump_to_file( - self, - queries_filename: str, - examples_filename: str, - sep: str = "\n", - ): - with open(queries_filename, "w") as queries_fp, open(examples_filename, "w") as examples_fp: - self.dump(queries_fp, examples_fp, sep) - -class ConvertableDataset(BaseDataset): +class ConvertibleDataset(BaseDataset): def to_dataset(self): raise NotImplementedError diff --git a/neuralogic/dataset/csv.py b/neuralogic/dataset/csv.py index e41b592b..04b8ed76 100644 --- a/neuralogic/dataset/csv.py +++ b/neuralogic/dataset/csv.py @@ -3,12 +3,12 @@ from typing import Optional, List, Union, TextIO, Callable, Sequence from neuralogic.core.constructs.factories import R -from neuralogic.core.constructs.relation import BaseRelation, WeightedRelation +from neuralogic.core.constructs.relation import BaseRelation from neuralogic.core.constructs.rule import Rule -from neuralogic.dataset import Dataset -from neuralogic.dataset.base import ConvertableDataset +from neuralogic.dataset import Dataset, Sample +from neuralogic.dataset.base import ConvertibleDataset -DatasetEntries = Union[BaseRelation, WeightedRelation, Rule] +DatasetEntries = Union[BaseRelation, Rule] class Mode(enum.Enum): @@ -138,7 +138,7 @@ def to_logic_form(self) -> Sequence[DatasetEntries]: return self._to_logic(self.csv_source) -class CSVDataset(ConvertableDataset): +class CSVDataset(ConvertibleDataset): def __init__( self, csv_files: Union[List[CSVFile], CSVFile], @@ -156,22 +156,23 @@ def set_query_csv_file(self, file: CSVFile): self.csv_queries = file def to_dataset(self) -> Dataset: - examples: List[List[DatasetEntries]] = [] queries = self.csv_queries.to_logic_form() if self.csv_queries else [] - dataset = Dataset(examples, queries) if self.mode == Mode.ONE_EXAMPLE: example: List[DatasetEntries] = [] for source in self.csv_files: example.extend(source.to_logic_form()) - examples.append(example) + if not queries: + return Dataset([Sample(None, example)]) + return Dataset([Sample(q, example) for q in queries]) elif self.mode == Mode.ZIP: logic_examples = [source.to_logic_form() for source in self.csv_files] - - for zipped_example in zip(*logic_examples): - examples.append(zipped_example) + if not queries: + return Dataset([Sample(None, zipped_example) for zipped_example in zip(*logic_examples)]) + return Dataset([Sample(q, zipped_example) for q, zipped_example in zip(queries, zip(*logic_examples))]) elif self.mode == Mode.EXAMPLE_PER_SOURCE: - for source in self.csv_files: - examples.append(source.to_logic_form()) - return dataset + if not queries: + return Dataset([Sample(None, source.to_logic_form()) for source in self.csv_files]) + return Dataset([Sample(q, source.to_logic_form()) for source, q in zip(self.csv_files, queries)]) + raise NotImplementedError diff --git a/neuralogic/dataset/db.py b/neuralogic/dataset/db.py index 5c0bbf00..26ad1943 100644 --- a/neuralogic/dataset/db.py +++ b/neuralogic/dataset/db.py @@ -6,7 +6,7 @@ from neuralogic.core.constructs.rule import Rule from neuralogic.dataset.logic import Dataset from neuralogic.dataset.csv import CSVDataset, CSVFile, Mode -from neuralogic.dataset.base import ConvertableDataset +from neuralogic.dataset.base import ConvertibleDataset DatasetEntries = Union[BaseRelation, WeightedRelation, Rule] @@ -89,7 +89,7 @@ def to_csv(self, cursor) -> CSVFile: ) -class DBDataset(ConvertableDataset): +class DBDataset(ConvertibleDataset): def __init__( self, connection, diff --git a/neuralogic/dataset/logic.py b/neuralogic/dataset/logic.py index 90a55e2c..c70ea3e5 100644 --- a/neuralogic/dataset/logic.py +++ b/neuralogic/dataset/logic.py @@ -1,10 +1,38 @@ -from typing import Optional, List, Union +from typing import Optional, List, Union, Sequence -from neuralogic.core.constructs.relation import BaseRelation, WeightedRelation +from neuralogic.core.constructs.relation import BaseRelation from neuralogic.core.constructs.rule import Rule from neuralogic.dataset.base import BaseDataset -DatasetEntries = Union[BaseRelation, WeightedRelation, Rule] +DatasetEntries = Union[BaseRelation, Rule] + + +class Sample: + __slots__ = ( + "query", + "example", + ) + + def __init__( + self, query: Optional[BaseRelation], example: Optional[Union[Sequence[DatasetEntries], DatasetEntries]] + ): + self.query = query + + if example is None: + example = [] + + if not isinstance(example, Sequence): + self.example = [example] + else: + self.example = example + + def __str__(self) -> str: + return str(self.query) + + def __len__(self) -> int: + if self.example is None: + return 0 + return len(self.example) class Dataset(BaseDataset): @@ -68,40 +96,36 @@ class Dataset(BaseDataset): """ - def __init__( - self, - examples: Optional[List[List[DatasetEntries]]] = None, - queries: Optional[List[Union[List[DatasetEntries], DatasetEntries]]] = None, - ): - self.examples: List[List[DatasetEntries]] = examples if examples is not None else [] - self.queries: List[Union[List[DatasetEntries], DatasetEntries]] = queries if queries is not None else [] + __slots__ = ("samples",) - def add_example(self, example): - self.add_examples([example]) + def __init__(self, samples: Optional[Union[List[Sample], Sample]] = None): + self.samples = samples - def add_examples(self, examples: List): - self.examples.extend(examples) + if self.samples is None: + self.samples = [] + elif not isinstance(self.samples, list): + self.samples = [self.samples] - def set_examples(self, examples: List): - self.examples = examples + def add_samples(self, samples: List[Sample]): + self.samples.extend(samples) - def add_query(self, query): - self.add_queries([query]) + def add_sample(self, sample: Sample): + self.samples.append(sample) - def add_queries(self, queries: List): - self.queries.extend(queries) + def add(self, query: BaseRelation, example: Optional[List[DatasetEntries]]): + self.samples.append(Sample(query, example)) - def set_queries(self, queries: List): - self.queries = queries + def __getitem__(self, item: int) -> Sample: + return self.samples[item] - def dump( - self, - queries_fp, - examples_fp, - sep: str = "\n", - ): - for examples in self.examples: - examples_fp.write(f"{','.join(example.to_str(False) for example in examples)}.{sep}") + def __setitem__(self, key: int, value: Sample): + self.samples[key] = value + + def __delitem__(self, key: int): + del self.samples + + def __str__(self): + return ". ".join(str(s) for s in self.samples) - for query in self.queries: - queries_fp.write(f"{query}{sep}") + def __len__(self): + return len(self.samples) diff --git a/neuralogic/dataset/tensor.py b/neuralogic/dataset/tensor.py index 87c4079d..cf9443ee 100644 --- a/neuralogic/dataset/tensor.py +++ b/neuralogic/dataset/tensor.py @@ -3,7 +3,7 @@ import numpy as np from neuralogic.core.constructs.factories import Relation -from neuralogic.dataset.base import ConvertableDataset +from neuralogic.dataset.base import ConvertibleDataset from neuralogic.dataset.logic import Dataset @@ -181,7 +181,7 @@ def from_pyg(data) -> List["Data"]: return data_list -class TensorDataset(ConvertableDataset): +class TensorDataset(ConvertibleDataset): r"""The ``TensorDataset`` holds a list of :py:class:`~neuralogic.dataset.tensor.Data` instances - a list of graphs represented in a tensor format. @@ -256,8 +256,11 @@ def to_dataset(self) -> Dataset: self.number_of_classes, ) - dataset.add_query(query) - dataset.add_example(examples) + if isinstance(query, Sequence): + for q in query: + dataset.add(q, examples) + else: + dataset.add(query, examples) return dataset def dump( diff --git a/neuralogic/inference/evaluation_inference_engine.py b/neuralogic/inference/evaluation_inference_engine.py index 2d1583a9..1d8f1959 100644 --- a/neuralogic/inference/evaluation_inference_engine.py +++ b/neuralogic/inference/evaluation_inference_engine.py @@ -4,41 +4,38 @@ from neuralogic.core.constructs.relation import BaseRelation from neuralogic.core.constructs.rule import Rule -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample class EvaluationInferenceEngine: def __init__(self, template: Template, settings: Settings = None): self.settings = Settings() if settings is None else settings self.model = template.build(self.settings) - - self.examples: List[Union[BaseRelation, Rule]] = [] - self.dataset = Dataset() - self.dataset.examples = [[]] + self.dataset = Dataset(Sample(None, None)) def set_knowledge(self, examples: List[Union[BaseRelation, Rule]]) -> None: - self.dataset.examples = [examples] + self.dataset[0].example = examples def q(self, query: BaseRelation, examples: Optional[List[Union[BaseRelation, Rule]]] = None): return self.query(query, examples) def query(self, query: BaseRelation, examples: Optional[List[Union[BaseRelation, Rule]]] = None): - global_examples = self.dataset.examples + global_examples = self.dataset[0].example if examples is not None: - self.dataset.examples = [examples] + self.dataset[0].example = examples - self.dataset.queries = [query] + self.dataset[0].query = query variables = [(name, index) for index, name in enumerate(query.terms) if str(name).isupper()] try: built_dataset = self.model.build_dataset(self.dataset) results = self.model(built_dataset.samples, train=False) except Exception: - self.dataset.examples = global_examples + self.dataset[0].example = global_examples return {} - self.dataset.examples = global_examples + self.dataset[0].example = global_examples if len(built_dataset.samples) != len(results): raise Exception diff --git a/neuralogic/nn/base.py b/neuralogic/nn/base.py index 1bd34638..ad2326f5 100644 --- a/neuralogic/nn/base.py +++ b/neuralogic/nn/base.py @@ -29,14 +29,12 @@ def ground( dataset: BaseDataset, *, batch_size: int = 1, - file_mode: bool = False, learnable_facts: bool = False, ) -> GroundedDataset: return self.dataset_builder.ground_dataset( dataset, self.settings, batch_size=batch_size, - file_mode=file_mode, learnable_facts=learnable_facts, ) @@ -45,7 +43,6 @@ def build_dataset( dataset: Union[BaseDataset, GroundedDataset], *, batch_size: int = 1, - file_mode: bool = False, learnable_facts: bool = False, progress: bool = False, ) -> BuiltDataset: @@ -53,7 +50,6 @@ def build_dataset( dataset, self.settings, batch_size=batch_size, - file_mode=file_mode, learnable_facts=learnable_facts, progress=progress, ) @@ -128,7 +124,6 @@ def build_dataset( dataset: Union[BaseDataset, BuiltDataset], *, batch_size: int = 1, - file_mode: bool = False, learnable_facts: bool = False, progress: bool = False, ): @@ -136,7 +131,6 @@ def build_dataset( return self.neuralogic_model.build_dataset( dataset, batch_size=batch_size, - file_mode=file_mode, learnable_facts=learnable_facts, progress=progress, ) diff --git a/neuralogic/nn/torch_function.py b/neuralogic/nn/torch_function.py index 9e5d8d59..4af7c59d 100644 --- a/neuralogic/nn/torch_function.py +++ b/neuralogic/nn/torch_function.py @@ -9,7 +9,7 @@ from neuralogic.core.constructs.java_objects import ValueFactory from neuralogic.core.constructs.relation import BaseRelation from neuralogic.core.constructs.rule import Rule -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample class _NeuraLogicFunction(Function): @@ -76,7 +76,7 @@ def __init__( self.model = template.build(settings) self.number_format = self.model.settings.settings_class.superDetailedNumberFormat - dataset = Dataset([input_facts], [output_relation]) + dataset = Dataset(Sample(output_relation, input_facts)) self.sample = self.model.build_dataset(dataset, learnable_facts=True).samples[0] self.value_factory = ValueFactory() diff --git a/tests/test_csv_datasets.py b/tests/test_csv_datasets.py index e24599e1..b7ad4c60 100644 --- a/tests/test_csv_datasets.py +++ b/tests/test_csv_datasets.py @@ -71,49 +71,55 @@ def tests_csv_file() -> None: assert [str(e) for e in examples] == expected -@pytest.mark.parametrize("mode,expected", [ - ( +@pytest.mark.parametrize( + "mode,expected", + [ + ( Mode.ONE_EXAMPLE, - [[ - "rel_a(1, 2, 3).", - "rel_a(4, 5, 6).", - "rel_a(7, 8, 9).", - "rel_b(1, 2, 3).", - "rel_b(4, 5, 6).", - "rel_b(7, 8, 9).", - "rel_c(1, 2, 3).", - "rel_c(4, 5, 6).", - "rel_c(7, 8, 9).", - ]] - ), - ( + [ + [ + "rel_a(1, 2, 3).", + "rel_a(4, 5, 6).", + "rel_a(7, 8, 9).", + "rel_b(1, 2, 3).", + "rel_b(4, 5, 6).", + "rel_b(7, 8, 9).", + "rel_c(1, 2, 3).", + "rel_c(4, 5, 6).", + "rel_c(7, 8, 9).", + ] + ], + ), + ( Mode.ZIP, [ - ["rel_a(1, 2, 3).", "rel_b(1, 2, 3).", "rel_c(1, 2, 3)."], - ["rel_a(4, 5, 6).", "rel_b(4, 5, 6).", "rel_c(4, 5, 6)."], - ["rel_a(7, 8, 9).", "rel_b(7, 8, 9).", "rel_c(7, 8, 9)."] - ] - ), - ( + ["rel_a(1, 2, 3).", "rel_b(1, 2, 3).", "rel_c(1, 2, 3)."], + ["rel_a(4, 5, 6).", "rel_b(4, 5, 6).", "rel_c(4, 5, 6)."], + ["rel_a(7, 8, 9).", "rel_b(7, 8, 9).", "rel_c(7, 8, 9)."], + ], + ), + ( Mode.EXAMPLE_PER_SOURCE, [ - [ - "rel_a(1, 2, 3).", - "rel_a(4, 5, 6).", - "rel_a(7, 8, 9).", - ], [ - "rel_b(1, 2, 3).", - "rel_b(4, 5, 6).", - "rel_b(7, 8, 9).", - ], [ - "rel_c(1, 2, 3).", - "rel_c(4, 5, 6).", - "rel_c(7, 8, 9).", + [ + "rel_a(1, 2, 3).", + "rel_a(4, 5, 6).", + "rel_a(7, 8, 9).", + ], + [ + "rel_b(1, 2, 3).", + "rel_b(4, 5, 6).", + "rel_b(7, 8, 9).", + ], + [ + "rel_c(1, 2, 3).", + "rel_c(4, 5, 6).", + "rel_c(7, 8, 9).", + ], ], - ] - ), - -]) + ), + ], +) def test_csv_dataset(mode: Mode, expected: List[List[str]]) -> None: csv_string_source = """1,2,3 4,5,6 @@ -132,8 +138,8 @@ def test_csv_dataset(mode: Mode, expected: List[List[str]]) -> None: dataset = CSVDataset([csv_source_a, csv_source_b, csv_source_c], mode=mode) logic_dataset = dataset.to_dataset() - assert len(logic_dataset.examples) == len(expected) + assert len(logic_dataset) == len(expected) - for exp, actual in zip(expected, logic_dataset.examples): - assert len(exp) == len(actual) - assert exp == [str(e) for e in actual] + for exp, sample in zip(expected, logic_dataset.samples): + assert len(exp) == len(sample) + assert exp == [str(e) for e in sample.example] diff --git a/tests/test_evaluation_inference_engine.py b/tests/test_evaluation_inference_engine.py index 74d8b882..c9d93be0 100644 --- a/tests/test_evaluation_inference_engine.py +++ b/tests/test_evaluation_inference_engine.py @@ -3,7 +3,7 @@ from neuralogic.inference.evaluation_inference_engine import EvaluationInferenceEngine -def test_eval_inference_engine_london_reachable() -> None: +def test_evaluation_inference_engine_london_reachable() -> None: """ Test of the evaluation inference engine based on https://book.simply-logical.space/part_i.html#a_brief_introduction_to_clausal_logic diff --git a/tests/test_function.py b/tests/test_function.py index 4f6acd23..afb784c5 100644 --- a/tests/test_function.py +++ b/tests/test_function.py @@ -5,7 +5,7 @@ import neuralogic.nn.functional as F from neuralogic.core import Template, R, Settings -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample @pytest.mark.parametrize( @@ -29,7 +29,7 @@ def test_transformation_body_function(torch_fun, fun): template += R.h / 0 | [F.identity] model = template.build(Settings(iso_value_compression=False, chain_pruning=False)) - dataset = Dataset([[R.input[data.tolist()]]], [R.h]) + dataset = Dataset([Sample(R.h, R.input[data.tolist()])]) built_dataset = model.build_dataset(dataset) @@ -60,7 +60,7 @@ def test_slice_function(): template += R.h / 0 | [F.identity] model = template.build(Settings(iso_value_compression=False, chain_pruning=False)) - dataset = Dataset([[R.input[data]]], [R.h]) + dataset = Dataset([Sample(R.h, [R.input[data]])]) built_dataset = model.build_dataset(dataset) results = np.array(model(built_dataset, train=False)[0]) @@ -72,7 +72,7 @@ def test_slice_function(): template += R.h / 0 | [F.identity] model = template.build(Settings(iso_value_compression=False, chain_pruning=False)) - dataset = Dataset([[R.input[data]]], [R.h]) + dataset = Dataset(Sample(R.h, [R.input[data]])) built_dataset = model.build_dataset(dataset) results = np.array(model(built_dataset, train=False)[0]) @@ -84,7 +84,7 @@ def test_slice_function(): template += R.h / 0 | [F.slice(rows=(1, 3))] model = template.build(Settings(iso_value_compression=False, chain_pruning=False)) - dataset = Dataset([[R.input[data]]], [R.h]) + dataset = Dataset(Sample(R.h, [R.input[data]])) built_dataset = model.build_dataset(dataset) results = np.array(model(built_dataset, train=False)[0]) diff --git a/tests/test_general_modules.py b/tests/test_general_modules.py index 017e4108..98cf60b0 100644 --- a/tests/test_general_modules.py +++ b/tests/test_general_modules.py @@ -4,7 +4,7 @@ from neuralogic.core import Template, R, Settings, Transformation from neuralogic.nn.module import Linear -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample @pytest.mark.parametrize( @@ -34,10 +34,8 @@ def test_linear_module(feature_size: int, output_size: int, num_of_inputs: int, state["weights"][0] = list(linear.parameters())[0].detach().numpy() model.load_state_dict(state) - examples = [R.f(index)[row.detach().numpy()] for index, row in enumerate(linear_input)] - queries = [R.h(index) for index, _ in enumerate(linear_input)] - - built_dataset = model.build_dataset(Dataset(examples, queries)) + samples = [Sample(R.h(index), [R.f(index)[row.detach().numpy()]]) for index, row in enumerate(linear_input)] + built_dataset = model.build_dataset(Dataset(samples)) for sample, row in zip(built_dataset.samples, linear_output): results = model(sample, train=False) diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 1c34eca9..ff440de2 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -1,4 +1,4 @@ -from neuralogic.dataset import Data, TensorDataset, Dataset +from neuralogic.dataset import Data, TensorDataset, Dataset, Sample from neuralogic.core import Template, Settings, Relation from neuralogic.nn import get_evaluator from neuralogic.nn.module import GCNConv @@ -22,9 +22,8 @@ def test_quick_start_from_tensor(): logic_dataset = dataset.to_dataset() - assert len(logic_dataset.examples) == 1 - assert len(logic_dataset.queries) == 1 - assert len(logic_dataset.examples[0]) == 9 + assert len(logic_dataset) == 3 + assert len(logic_dataset[0]) == 9 expected = [ "<1> edge(0, 1).", @@ -38,12 +37,12 @@ def test_quick_start_from_tensor(): "<-1> node_feature(2).", ] - for a, b in zip(logic_dataset.examples[0], expected): + for a, b in zip(logic_dataset[0].example, expected): assert str(a) == b - assert str(logic_dataset.queries[0][0]) == "1.0 predict(0)." - assert str(logic_dataset.queries[0][1]) == "0.0 predict(1)." - assert str(logic_dataset.queries[0][2]) == "1.0 predict(2)." + assert str(logic_dataset[0]) == "1.0 predict(0)." + assert str(logic_dataset[1]) == "0.0 predict(1)." + assert str(logic_dataset[2]) == "1.0 predict(2)." def test_model_evaluation_from_tensor(): @@ -81,25 +80,23 @@ def test_model_evaluation_from_tensor(): def test_model_evaluation_from_logic(): dataset = Dataset() - dataset.add_example( - [ - Relation.edge(0, 1), - Relation.edge(1, 2), - Relation.edge(2, 0), - Relation.edge(1, 0), - Relation.edge(2, 1), - Relation.edge(0, 2), - Relation.node_feature(0)[0], - Relation.node_feature(1)[1], - Relation.node_feature(2)[-1], - ] - ) + example = [ + Relation.edge(0, 1), + Relation.edge(1, 2), + Relation.edge(2, 0), + Relation.edge(1, 0), + Relation.edge(2, 1), + Relation.edge(0, 2), + Relation.node_feature(0)[0], + Relation.node_feature(1)[1], + Relation.node_feature(2)[-1], + ] - dataset.add_queries( + dataset.add_samples( [ - Relation.predict(0)[1], - Relation.predict(1)[0], - Relation.predict(2)[1], + Sample(Relation.predict(0)[1], example), + Sample(Relation.predict(1)[0], example), + Sample(Relation.predict(2)[1], example), ] ) @@ -125,25 +122,23 @@ def test_model_evaluation_from_logic(): def test_evaluator_from_logic(): dataset = Dataset() - dataset.add_example( - [ - Relation.edge(0, 1), - Relation.edge(1, 2), - Relation.edge(2, 0), - Relation.edge(1, 0), - Relation.edge(2, 1), - Relation.edge(0, 2), - Relation.node_feature(0)[0], - Relation.node_feature(1)[1], - Relation.node_feature(2)[-1], - ] - ) + example = [ + Relation.edge(0, 1), + Relation.edge(1, 2), + Relation.edge(2, 0), + Relation.edge(1, 0), + Relation.edge(2, 1), + Relation.edge(0, 2), + Relation.node_feature(0)[0], + Relation.node_feature(1)[1], + Relation.node_feature(2)[-1], + ] - dataset.add_queries( + dataset.add_samples( [ - Relation.predict(0)[1], - Relation.predict(1)[0], - Relation.predict(2)[1], + Sample(Relation.predict(0)[1], example), + Sample(Relation.predict(1)[0], example), + Sample(Relation.predict(2)[1], example), ] ) diff --git a/tests/test_recurrent_modules.py b/tests/test_recurrent_modules.py index bc5d25b5..92ea1a77 100644 --- a/tests/test_recurrent_modules.py +++ b/tests/test_recurrent_modules.py @@ -3,7 +3,7 @@ import torch from neuralogic.core import Template, Settings, R -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample from neuralogic.nn.loss import MSE from neuralogic.nn.module import GRU, RNN, LSTM @@ -46,12 +46,14 @@ def test_gru_module(input_size, hidden_size, sequence_len, epochs): dataset = Dataset( [ - [ - R.h0[[float(h) for h in h0[0]]], - *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], - ] - ], - [R.h(sequence_len)[target.detach().numpy().tolist()]], + Sample( + R.h(sequence_len)[target.detach().numpy().tolist()], + [ + R.h0[[float(h) for h in h0[0]]], + *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], + ], + ) + ] ) bd = model.build_dataset(dataset) @@ -102,12 +104,14 @@ def test_rnn_module(input_size, hidden_size, sequence_len, epochs): dataset = Dataset( [ - [ - R.h0[[float(h) for h in h0[0]]], - *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], - ] - ], - [R.h(sequence_len)[target.detach().numpy().tolist()]], + Sample( + R.h(sequence_len)[target.detach().numpy().tolist()], + [ + R.h0[[float(h) for h in h0[0]]], + *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], + ], + ), + ] ) bd = model.build_dataset(dataset) @@ -166,13 +170,15 @@ def test_lstm_module(input_size, hidden_size, sequence_len, epochs): dataset = Dataset( [ - [ - R.c0[[float(c) for c in c0[0]]], - R.h0[[float(h) for h in h0[0]]], - *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], - ] - ], - [R.h(sequence_len)[target.detach().numpy().tolist()]], + Sample( + R.h(sequence_len)[target.detach().numpy().tolist()], + [ + R.c0[[float(c) for c in c0[0]]], + R.h0[[float(h) for h in h0[0]]], + *[R.f(i + 1)[[float(h) for h in torch_input[i]]] for i in range(sequence_len)], + ], + ), + ] ) bd = model.build_dataset(dataset) diff --git a/tests/test_transformer.py b/tests/test_transformer.py index cbbd7690..235900c9 100644 --- a/tests/test_transformer.py +++ b/tests/test_transformer.py @@ -3,7 +3,7 @@ import torch from neuralogic.core import Template, Settings, V, R -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample from neuralogic.nn.module import MultiheadAttention @@ -42,7 +42,7 @@ def test_multiheadattention(qdim: int, kdim: int, vdim: int, num_heads: int, seq example.append(R.k(i)[keys[i]]) example.append(R.q(i)[queries[i]]) - dataset = Dataset([example], [R.out(i) for i in range(sequence_len)]) + dataset = Dataset([Sample(R.out(i), example) for i in range(sequence_len)]) built_dataset = model.build_dataset(dataset) torch_results = mha(queries, keys, values) diff --git a/tests/test_xor_generalization.py b/tests/test_xor_generalization.py index 6174b27c..0ee98346 100644 --- a/tests/test_xor_generalization.py +++ b/tests/test_xor_generalization.py @@ -6,7 +6,7 @@ from neuralogic import manual_seed from neuralogic.nn import get_evaluator from neuralogic.core import Settings, R, V, Template, Transformation -from neuralogic.dataset import Dataset +from neuralogic.dataset import Dataset, Sample from neuralogic.optim import SGD @@ -29,12 +29,12 @@ def test_xor_generalization_accurate(n: int, expected: List[int]) -> None: template += R.xor_at(0) <= R.val_at(0) template += R.xor_at(V.Y)["a":1, 8] <= (R.val_at(V.Y)["b":8, 1], R.xor_at(V.X)["c":8, 1], R.special.next(V.X, V.Y)) - dataset.add_examples( + dataset.add_samples( [ - R.xor_at(1)[0] <= (R.val_at(0)[0], R.val_at(1)[0]), - R.xor_at(1)[1] <= (R.val_at(0)[0], R.val_at(1)[1]), - R.xor_at(1)[1] <= (R.val_at(0)[1], R.val_at(1)[0]), - R.xor_at(1)[0] <= (R.val_at(0)[1], R.val_at(1)[1]), + Sample(R.xor_at(1)[0], [R.val_at(0)[0], R.val_at(1)[0]]), + Sample(R.xor_at(1)[1], [R.val_at(0)[0], R.val_at(1)[1]]), + Sample(R.xor_at(1)[1], [R.val_at(0)[1], R.val_at(1)[0]]), + Sample(R.xor_at(1)[0], [R.val_at(0)[1], R.val_at(1)[1]]), ] ) @@ -50,7 +50,7 @@ def test_xor_generalization_accurate(n: int, expected: List[int]) -> None: n_dataset = Dataset() for example in products: - n_dataset.add_example(R.xor_at(n - 1)[0] <= (R.val_at(i)[int(val)] for i, val in enumerate(example))) + n_dataset.add_sample(Sample(R.xor_at(n - 1)[0], [R.val_at(i)[int(val)] for i, val in enumerate(example)])) for expected_value, predicted in zip(expected, evaluator.test(n_dataset)): assert expected_value == predicted @@ -90,18 +90,11 @@ def test_xor_generalization(n: int, expected: List[int]) -> None: # The training dataset to train xor on two inputs x(0) and x(1), n(1) is means the max index of input is 1 # x(0, 1) defines which input should be "xor-ed" together dataset = Dataset() - dataset.add_examples( - [ - [R.xy(0, 1), R.x(0)[0.0], R.x(1)[0.0], R.n(1)], - [R.xy(0, 1), R.x(0)[1.0], R.x(1)[0.0], R.n(1)], - [R.xy(0, 1), R.x(0)[0.0], R.x(1)[1.0], R.n(1)], - [R.xy(0, 1), R.x(0)[1.0], R.x(1)[1.0], R.n(1)], - ] - ) - - # Trarining queries (0, 1, 1, 0) - dataset.add_queries([ - R.xor[0.0], R.xor[1.0], R.xor[1.0], R.xor[0.0], + dataset.add_samples([ + Sample(R.xor[0.0], [R.xy(0, 1), R.x(0)[0.0], R.x(1)[0.0], R.n(1)]), + Sample(R.xor[1.0], [R.xy(0, 1), R.x(0)[1.0], R.x(1)[0.0], R.n(1)]), + Sample(R.xor[1.0], [R.xy(0, 1), R.x(0)[0.0], R.x(1)[1.0], R.n(1)]), + Sample(R.xor[0.0], [R.xy(0, 1), R.x(0)[1.0], R.x(1)[1.0], R.n(1)]), ]) settings = Settings(optimizer=SGD(), epochs=300) @@ -126,8 +119,7 @@ def test_xor_generalization(n: int, expected: List[int]) -> None: fact_example.append(R.n(n - 1)) # Add example and query to the dataset, the query has some default value (1.0) as we do not care about the label - n_dataset.add_example(fact_example) - n_dataset.add_query(R.xor) + n_dataset.add(R.xor, fact_example) # Check that we predicted correct values for n inputs for model trained on 2 inputs for expected_value, predicted in zip(expected, neuralogic_evaluator.test(n_dataset)):