Introduce Sample as entry for Dataset

LukasZahradnik · Nov 26, 2023 · b6e7c75 · b6e7c75
1 parent 1f58fce
commit b6e7c75
Show file tree

Hide file tree

Showing 28 changed files with 347 additions and 378 deletions.
diff --git a/benchmarks/pyneuralogic_benchmark.py b/benchmarks/pyneuralogic_benchmark.py
@@ -167,7 +167,7 @@ def evaluate(model, dataset, steps, dataset_loc, dim, task: Task):
         dataset.number_of_classes = task.output_size
         dataset.one_hot_encoding = True
 
-    built_dataset = model.build_dataset(dataset, file_mode=True)
+    built_dataset = model.build_dataset(dataset)
 
     build_time = time.perf_counter() - start_time
     start_time = time.perf_counter()

diff --git a/examples/datasets/horses.py b/examples/datasets/horses.py
@@ -9,26 +9,20 @@
 
 template.add_rules(
     [
-        Relation.foal(Var.X)[1,] <= (Relation.parent(Var.X, Var.Y), Relation.horse(Var.Y)), # todo gusta: mozna prejmenovat Atom -> Predicate by odpovidalo skutecnosti prirozeneji?
-        Relation.foal(Var.X)[1,] <= (Relation.sibling(Var.X, Var.Y), Relation.horse(Var.Y)),
-        Relation.negFoal(Var.X)[1,] <= Relation.foal(Var.X),
+        Relation.foal(Var.X)[1, ] <= (Relation.parent(Var.X, Var.Y), Relation.horse(Var.Y)),  # todo gusta: mozna prejmenovat Atom -> Predicate by odpovidalo skutecnosti prirozeneji?
+        Relation.foal(Var.X)[1, ] <= (Relation.sibling(Var.X, Var.Y), Relation.horse(Var.Y)),
+        Relation.negFoal(Var.X)[1, ] <= Relation.foal(Var.X),
     ]
 )
 
-dataset.add_example(
-    [
-        Relation.horse(Constant.aida)[1.0],
-        Relation.horse(Constant.cheyenne)[1.0],
-        Relation.horse(Constant.dakotta)[1.0],
-        Relation.parent(Constant.star, Constant.cheyenne)[1.0],
-        Relation.parent(Constant.star, Constant.aida)[1.0],
-        Relation.parent(Constant.star, Constant.dakotta)[1.0],
-    ]
-)
+example = [
+    Relation.horse(Constant.aida)[1.0],
+    Relation.horse(Constant.cheyenne)[1.0],
+    Relation.horse(Constant.dakotta)[1.0],
+    Relation.parent(Constant.star, Constant.cheyenne)[1.0],
+    Relation.parent(Constant.star, Constant.aida)[1.0],
+    Relation.parent(Constant.star, Constant.dakotta)[1.0],
+]
 
-dataset.add_queries(
-    [
-        Relation.foal(Constant.star)[1.0],
-        Relation.negFoal(Constant.star)[0.0],
-    ]
-)
+dataset.add(Relation.foal(Constant.star)[1.0], example)
+dataset.add(Relation.negFoal(Constant.star)[0.0], example)
diff --git a/examples/datasets/multiple_examples_no_order_trains.py b/examples/datasets/multiple_examples_no_order_trains.py
@@ -18,20 +18,20 @@
 loadshapes = [Constant.hexagon, Constant.triangle, Constant.diamond, Constant.rectangle, Constant.circle]
 vagon_atoms = [Relation.shape, Relation.length, Relation.sides, Relation.wheels, Relation.loadnum, Relation.loadshape, Relation.roof]
 
-Y = Var.Y   #todo gusta: tohle je dobry trik, ten bych pouzival na vic mistech, a podobne pro Atom/Predicate factories udelat zkratky (treba P.)
+Y = Var.Y  # todo gusta: tohle je dobry trik, ten bych pouzival na vic mistech, a podobne pro Atom/Predicate factories udelat zkratky (treba P.)
 
 template.add_rules(
     [
-        *[Relation.shape(Y) <= Relation.shape(Y, s)[1,] for s in shapes],
-        *[Relation.length(Y) <= Relation.length(Y, s)[1,] for s in [Constant.short, Constant.long]],
-        *[Relation.sides(Y) <= Relation.sides(Y, s)[1,] for s in [Constant.not_double, Constant.double]],
-        *[Relation.roof(Y) <= Relation.roof(Y, s)[1,] for s in roofs],
-        *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1,] for s in [2, 3]],
-        *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1,] for s in [0, 1, 2, 3]],
-        *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1,] for s in loadshapes],
-        Relation.vagon(Y) <= (atom(Y)[1,] for atom in vagon_atoms),
-        Relation.train <= Relation.vagon(Y)[1,],
-        Relation.direction <= Relation.train[1,],
+        *[Relation.shape(Y) <= Relation.shape(Y, s)[1, ] for s in shapes],
+        *[Relation.length(Y) <= Relation.length(Y, s)[1, ] for s in [Constant.short, Constant.long]],
+        *[Relation.sides(Y) <= Relation.sides(Y, s)[1, ] for s in [Constant.not_double, Constant.double]],
+        *[Relation.roof(Y) <= Relation.roof(Y, s)[1, ] for s in roofs],
+        *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1, ] for s in [2, 3]],
+        *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1, ] for s in [0, 1, 2, 3]],
+        *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1, ] for s in loadshapes],
+        Relation.vagon(Y) <= (atom(Y)[1, ] for atom in vagon_atoms),
+        Relation.train <= Relation.vagon(Y)[1, ],
+        Relation.direction <= Relation.train[1, ],
     ]
 )
 
@@ -52,5 +52,8 @@
         ]
     )
 
-dataset.add_examples(examples)
-dataset.add_queries([*[Relation.direction[1.0] for _ in range(1, 11)], *[Relation.direction[-1.0] for _ in range(11, 21)]])
+for example in examples[:10]:
+    dataset.add(Relation.direction[1.0], example)
+
+for example in examples[10:]:
+    dataset.add(Relation.direction[-1.0], example)
diff --git a/examples/datasets/multiple_examples_trains.py b/examples/datasets/multiple_examples_trains.py
@@ -21,16 +21,16 @@
 
 template.add_rules(
     [
-        *[Relation.shape(Y) <= Relation.shape(Y, s)[1,] for s in shapes],
-        *[Relation.length(Y) <= Relation.length(Y, s)[1,] for s in [Constant.short, Constant.long]],
-        *[Relation.sides(Y) <= Relation.sides(Y, s)[1,] for s in [Constant.not_double, Constant.double]],
-        *[Relation.roof(Y) <= Relation.roof(Y, s)[1,] for s in roofs],
-        *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1,] for s in [2, 3]],
-        *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1,] for s in [0, 1, 2, 3]],
-        *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1,] for s in loadshapes],
-        Relation.vagon(Y) <= (atom(Y)[1,] for atom in vagon_atoms),
-        *[Relation.train <= Relation.vagon(i)[1,] for i in [1, 2, 3, 4]],
-        Relation.direction <= Relation.train[1,],
+        *[Relation.shape(Y) <= Relation.shape(Y, s)[1, ] for s in shapes],
+        *[Relation.length(Y) <= Relation.length(Y, s)[1, ] for s in [Constant.short, Constant.long]],
+        *[Relation.sides(Y) <= Relation.sides(Y, s)[1, ] for s in [Constant.not_double, Constant.double]],
+        *[Relation.roof(Y) <= Relation.roof(Y, s)[1, ] for s in roofs],
+        *[Relation.wheels(Y) <= Relation.wheels(Y, s)[1, ] for s in [2, 3]],
+        *[Relation.loadnum(Y) <= Relation.loadnum(Y, s)[1, ] for s in [0, 1, 2, 3]],
+        *[Relation.loadshape(Y) <= Relation.loadshape(Y, s)[1, ] for s in loadshapes],
+        Relation.vagon(Y) <= (atom(Y)[1, ] for atom in vagon_atoms),
+        *[Relation.train <= Relation.vagon(i)[1, ] for i in [1, 2, 3, 4]],
+        Relation.direction <= Relation.train[1, ],
     ]
 )
 
@@ -50,5 +50,9 @@
         ]
     )
 
-dataset.add_examples(examples)
-dataset.add_queries([*[Relation.direction[1.0] for _ in range(1, 11)], *[Relation.direction[-1.0] for _ in range(11, 21)]])
+
+for example in examples[:10]:
+    dataset.add(Relation.direction[1.0], example)
+
+for example in examples[10:]:
+    dataset.add(Relation.direction[-1.0], example)
diff --git a/examples/datasets/naive_trains.py b/examples/datasets/naive_trains.py
@@ -21,35 +21,35 @@
 
 template.add_rules(
     [
-        *[Relation.shape(X, Y) <= Relation.shape(X, Y, s)[1,] for s in shapes],
-        *[Relation.length(X, Y) <= Relation.length(X, Y, s)[1,] for s in [Constant.short, Constant.long]],
-        *[Relation.sides(X, Y) <= Relation.sides(X, Y, s)[1,] for s in [Constant.not_double, Constant.double]],
-        *[Relation.roof(X, Y) <= Relation.roof(X, Y, s)[1,] for s in roofs],
-        *[Relation.wheels(X, Y) <= Relation.wheels(X, Y, s)[1,] for s in [2, 3]],
-        *[Relation.loadnum(X, Y) <= Relation.loadnum(X, Y, s)[1,] for s in [0, 1, 2, 3]],
-        *[Relation.loadshape(X, Y) <= Relation.loadshape(X, Y, s)[1,] for s in loadshapes],
-        Relation.vagon(X, Y) <= (atom(X, Y)[1,] for atom in vagon_atoms),
-        *[Relation.train(X) <= Relation.vagon(X, i)[1,] for i in [1, 2, 3, 4]],
-        Relation.direction(X) <= Relation.train(X)[1,],
+        *[Relation.shape(X, Y) <= Relation.shape(X, Y, s)[1, ] for s in shapes],
+        *[Relation.length(X, Y) <= Relation.length(X, Y, s)[1, ] for s in [Constant.short, Constant.long]],
+        *[Relation.sides(X, Y) <= Relation.sides(X, Y, s)[1, ] for s in [Constant.not_double, Constant.double]],
+        *[Relation.roof(X, Y) <= Relation.roof(X, Y, s)[1, ] for s in roofs],
+        *[Relation.wheels(X, Y) <= Relation.wheels(X, Y, s)[1, ] for s in [2, 3]],
+        *[Relation.loadnum(X, Y) <= Relation.loadnum(X, Y, s)[1, ] for s in [0, 1, 2, 3]],
+        *[Relation.loadshape(X, Y) <= Relation.loadshape(X, Y, s)[1, ] for s in loadshapes],
+        Relation.vagon(X, Y) <= (atom(X, Y)[1, ] for atom in vagon_atoms),
+        *[Relation.train(X) <= Relation.vagon(X, i)[1, ] for i in [1, 2, 3, 4]],
+        Relation.direction(X) <= Relation.train(X)[1, ],
     ]
 )
 
-dataset.add_example(
-    [
-        atom
-        for _, id, pos, shape, length, sides, roof, wheels, load, loadnum in train_example_data
-        for atom in [
-            Relation.shape(id, pos, shape),
-            Relation.length(id, pos, length),
-            Relation.sides(id, pos, sides),
-            Relation.roof(id, pos, roof),
-            Relation.wheels(id, pos, wheels),
-            Relation.loadshape(id, pos, load),
-            Relation.loadnum(id, pos, loadnum),
-        ]
+example = [
+    atom
+    for _, id, pos, shape, length, sides, roof, wheels, load, loadnum in train_example_data
+    for atom in [
+        Relation.shape(id, pos, shape),
+        Relation.length(id, pos, length),
+        Relation.sides(id, pos, sides),
+        Relation.roof(id, pos, roof),
+        Relation.wheels(id, pos, wheels),
+        Relation.loadshape(id, pos, load),
+        Relation.loadnum(id, pos, loadnum),
     ]
-)
+]
 
-dataset.add_queries(
-    [*[Relation.direction(i)[1.0] for i in range(1, 11)], *[Relation.direction(i)[-1.0] for i in range(11, 21)]]
-)
+for i in range(1, 11):
+    dataset.add(Relation.direction(i)[1.0], example)
+
+for i in range(11, 21):
+    dataset.add(Relation.direction(i)[-1.0], example)
diff --git a/examples/datasets/naive_xor.py b/examples/datasets/naive_xor.py
@@ -1,5 +1,5 @@
 from neuralogic.core import Relation, Template
-from neuralogic.dataset import Dataset
+from neuralogic.dataset import Dataset, Sample
 
 
 dataset = Dataset()
@@ -8,16 +8,16 @@
 # fmt: off
 
 # hidden<1-8> :- {1} a, {1} b.
-template.add_rules([Relation.get(f"hidden{i}") <= (Relation.a[1,], Relation.b[1,]) for i in range(1, 9)])
+template.add_rules([Relation.get(f"hidden{i}") <= (Relation.a[1, ], Relation.b[1, ]) for i in range(1, 9)])
 
 # {1} xor :- hidden<1-8>.
-template.add_rules([Relation.xor[1,] <= Relation.get(f"hidden{i}") for i in range(1, 9)])
+template.add_rules([Relation.xor[1, ] <= Relation.get(f"hidden{i}") for i in range(1, 9)])
 
-dataset.add_examples(
+dataset.add_samples(
     [  # Add 4 examples
-        Relation.xor[0] <= (Relation.a[0], Relation.b[0]),
-        Relation.xor[1] <= (Relation.a[1], Relation.b[0]),
-        Relation.xor[1] <= (Relation.a[0], Relation.b[1]),
-        Relation.xor[0] <= (Relation.a[1], Relation.b[1]),
+        Sample(Relation.xor[0], [Relation.a[0], Relation.b[0]]),
+        Sample(Relation.xor[1], [Relation.a[1], Relation.b[0]]),
+        Sample(Relation.xor[1], [Relation.a[0], Relation.b[1]]),
+        Sample(Relation.xor[0], [Relation.a[1], Relation.b[1]]),
     ]
 )
diff --git a/examples/datasets/vectorized_xor.py b/examples/datasets/vectorized_xor.py
@@ -1,5 +1,5 @@
 from neuralogic.core import Relation, Template
-from neuralogic.dataset import Dataset
+from neuralogic.dataset import Dataset, Sample
 
 
 dataset = Dataset()
@@ -9,11 +9,11 @@
 
 template.add_rule(Relation.xor[1, 8] <= Relation.xy[8, 2])  # Add template rule
 
-dataset.add_examples(
+dataset.add_samples(
     [  # Add 4 examples
-        Relation.xor[0] <= Relation.xy[[0, 0]],
-        Relation.xor[1] <= Relation.xy[[0, 1]],
-        Relation.xor[1] <= Relation.xy[[1, 0]],
-        Relation.xor[0] <= Relation.xy[[1, 1]],
+        Sample(Relation.xor[0], Relation.xy[[0, 0]]),
+        Sample(Relation.xor[1], Relation.xy[[0, 1]]),
+        Sample(Relation.xor[1], Relation.xy[[1, 0]]),
+        Sample(Relation.xor[0], Relation.xy[[1, 1]]),
     ]
 )
diff --git a/neuralogic/core/builder/__init__.py b/neuralogic/core/builder/__init__.py
@@ -1,5 +1,5 @@
 from neuralogic.core.builder.builder import Builder
 from neuralogic.core.builder.dataset_builder import DatasetBuilder
-from neuralogic.core.builder.components import Neuron, Weight, Sample, BuiltDataset, GroundedDataset
+from neuralogic.core.builder.components import Neuron, Weight, BuiltDataset, GroundedDataset
 
-__all__ = ["Builder", "DatasetBuilder", "Neuron", "Weight", "Sample", "BuiltDataset", "GroundedDataset"]
+__all__ = ["Builder", "DatasetBuilder", "Neuron", "Weight", "BuiltDataset", "GroundedDataset"]
diff --git a/neuralogic/core/builder/builder.py b/neuralogic/core/builder/builder.py
@@ -4,7 +4,7 @@
 from tqdm.autonotebook import tqdm
 
 from neuralogic import is_initialized, initialize
-from neuralogic.core.builder.components import Sample, RawSample
+from neuralogic.core.builder.components import NeuralSample
 from neuralogic.core.settings import SettingsProxy
 from neuralogic.core.sources import Sources
 
@@ -58,7 +58,7 @@ def ground_from_sources(self, parsed_template, sources: Sources):
     def ground_from_logic_samples(self, parsed_template, logic_samples):
         return self._ground(parsed_template, None, logic_samples)
 
-    def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) -> List[RawSample]:
+    def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) -> List[NeuralSample]:
         if sources is not None:
             ground_pipeline = self.example_builder.buildGroundings(parsed_template, sources.sources)
         else:
@@ -69,20 +69,20 @@ def _ground(self, parsed_template, sources: Optional[Sources], logic_samples) ->
 
         return ground_pipeline.get()
 
-    def neuralize(self, groundings, progress: bool, length: Optional[int]) -> List[RawSample]:
+    def neuralize(self, groundings, progress: bool, length: Optional[int]) -> List[NeuralSample]:
         if not progress:
             return self._neuralize(groundings, None)
         with tqdm(total=length, desc="Building", unit=" samples", dynamic_ncols=True) as pbar:
             return self._neuralize(groundings, self._callback(pbar))
 
-    def _neuralize(self, groundings, callback) -> List[RawSample]:
+    def _neuralize(self, groundings, callback) -> List[NeuralSample]:
         neuralize_pipeline = self.example_builder.neuralize(groundings, None)
         neuralize_pipeline.execute(None)
 
         samples = neuralize_pipeline.get()
         logic_samples = samples.collect(self.collectors.toList())
 
-        return [RawSample(sample, None) for sample in logic_samples]
+        return [NeuralSample(sample, None) for sample in logic_samples]
 
     def build_model(self, parsed_template, settings: SettingsProxy):
         neural_model = self.neural_model(parsed_template.getAllWeights(), settings.settings)
@@ -95,14 +95,6 @@ def get_builders(settings: SettingsProxy):
 
         return builder
 
-    @staticmethod
-    def build(samples):
-        serializer = jpype.JClass("cz.cvut.fel.ida.neural.networks.structure.export.NeuralSerializer")()
-        super_detailed_format = jpype.JClass("cz.cvut.fel.ida.setup.Settings").superDetailedNumberFormat
-        serializer.numberFormat = super_detailed_format
-
-        return [Sample(serializer.serialize(sample), sample) for sample in samples]
-
     @staticmethod
     def _get_spinner_text(count: int) -> str:
         if count == 1:

diff --git a/neuralogic/core/builder/components.py b/neuralogic/core/builder/components.py
@@ -8,7 +8,7 @@
 from neuralogic.utils.visualize import draw_sample, draw_grounding
 
 
-class RawSample:
+class NeuralSample:
     __slots__ = "java_sample", "fact_cache", "grounding"
 
     def __init__(self, sample, grounding):
@@ -70,30 +70,6 @@ def draw(
         return draw_sample(self, filename, show, img_type, value_detail, graphviz_path, *args, **kwargs)
 
 
-class Sample(RawSample):
-    __slots__ = ("id", "target", "neurons", "output_neuron", "java_sample")
-
-    def __init__(self, sample, java_sample):
-        super().__init__(sample, None)
-        serialized_sample = json.loads(str(sample.exportToJson()))
-
-        self.id = serialized_sample["id"]
-        self.target = json.loads(serialized_sample["target"])
-        self.neurons = Sample.deserialize_network(serialized_sample["network"])
-        self.output_neuron = self.neurons[-1].index
-        self.java_sample = java_sample
-
-    @staticmethod
-    def deserialize_network(network):
-        neurons = []
-
-        for i, neuron in enumerate(network):
-            neuron_object = Neuron(neuron, i)
-            neurons.append(neuron_object)
-
-        return neurons
-
-
 class Neuron:
     def __init__(self, neuron: Dict[str, Any], index):
         self.index = index
@@ -159,7 +135,7 @@ class BuiltDataset:
 
     __slots__ = "samples", "batch_size"
 
-    def __init__(self, samples: List[RawSample], batch_size: int):
+    def __init__(self, samples: List[NeuralSample], batch_size: int):
         self.samples = samples
         self.batch_size = batch_size