Skip to content

Commit 2003296

Browse files
committed
Add basic histogram rendering
1 parent f2f869b commit 2003296

File tree

5 files changed

+317
-5
lines changed

5 files changed

+317
-5
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from typing import List, Tuple
2+
3+
4+
def draw_histogram(frequencies: dict[str, int], total: int|None = None) -> None:
5+
from rich import print
6+
from rich.table import Table
7+
8+
max_freq = max(frequencies.values()) if total is None else total
9+
10+
table = Table(title="Histogram")
11+
12+
table.add_column("Instances")
13+
table.add_column("Name", justify="left")
14+
table.add_column("Chart")
15+
16+
17+
for key, freq in frequencies.items():
18+
table.add_row(str(freq), key, f"[bold magenta]{'█' * int(40 * freq / max_freq)}[/]")
19+
20+
print(table)
21+
22+
23+
pass
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
from typing import List, Optional
2+
from pydantic import BaseModel, PositiveInt
3+
4+
from pyrdfrules.common.result.resultobject import ResultObject
5+
6+
class HistogramSingleResult(ResultObject):
7+
"""
8+
Histogram single result.
9+
"""
10+
11+
amount: PositiveInt
12+
"""Histogram amount.
13+
"""
14+
15+
object: Optional[str|dict|None] = None
16+
"""RDF object, if available.
17+
"""
18+
19+
predicate: Optional[str|dict|None] = None
20+
"""RDF predicate, if available.
21+
"""
22+
23+
subject: Optional[str|dict|None] = None
24+
"""RDF subject, if available.
25+
"""
26+
27+
def get_histogram_name(self) -> str:
28+
"""Returns the name of the histogram item.
29+
30+
Returns:
31+
str: Name of the histogram item.
32+
"""
33+
34+
parts = []
35+
36+
if self.subject is not None:
37+
parts.append(self.subject)
38+
39+
if self.predicate is not None:
40+
parts.append(self.predicate)
41+
42+
if self.object is not None:
43+
parts.append(self.object)
44+
45+
if len(parts) == 0:
46+
return "Unknown"
47+
48+
return " - ".join(parts)
49+
50+
pass
51+
52+
class HistogramResult(BaseModel):
53+
"""
54+
Histogram result collection.
55+
56+
Attributes:
57+
subject (bool): If True, the histogram is created for subjects.
58+
predicate (bool): If True, the histogram is created for predicates.
59+
object (bool): If True, the histogram is created for objects.
60+
"""
61+
62+
list: List[HistogramSingleResult] = []
63+
64+
def get_sorted(self, reverse = True) -> List[HistogramSingleResult]:
65+
"""Returns the histogram list sorted by amount.
66+
67+
Args:
68+
reverse (bool): If True, the list is sorted in descending order (most frequent first).
69+
70+
Returns:
71+
list[HistogramSingleResult]: Sorted histogram list.
72+
"""
73+
74+
return sorted(self.list, key=lambda x: x.amount, reverse=reverse)
75+
76+
def get_top(self, n: int) -> List[HistogramSingleResult]:
77+
"""Returns the top n elements from the histogram.
78+
79+
Args:
80+
n (int): Number of elements to return.
81+
82+
Returns:
83+
list[HistogramSingleResult]: Top n elements.
84+
"""
85+
86+
return self.get_sorted()[:n]
87+
88+
def get_bottom(self, n: int) -> List[HistogramSingleResult]:
89+
"""Returns the bottom n elements from the histogram.
90+
91+
Args:
92+
n (int): Number of elements to return.
93+
94+
Returns:
95+
list[HistogramSingleResult]: Bottom n elements.
96+
"""
97+
98+
return self.get_sorted(reverse=False)[:n]
99+
100+
def print(self, top_n: int = 10):
101+
"""Prints the histogram.
102+
103+
Args:
104+
top_n (int): Number of top elements to print.
105+
"""
106+
107+
from pyrdfrules.common.format.histogram import draw_histogram
108+
109+
top = self.get_top(top_n)
110+
111+
mapping = {}
112+
113+
for item in top:
114+
mapping[item.get_histogram_name()] = item.amount
115+
116+
draw_histogram(mapping)
117+
118+
pass

src/pyrdfrules/common/result/result.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from typing import List
33
from pyrdfrules.common.logging.logger import log
44
from pyrdfrules.common.result.evaluation import Evaluation
5+
from pyrdfrules.common.result.histogram import HistogramResult, HistogramSingleResult
56
from pyrdfrules.common.rule.resultrule import ResultRule
67
from pyrdfrules.common.rule.ruleset import Ruleset
78

8-
99
class Result():
1010
"""Class representing the result of a task.
1111
"""
@@ -18,6 +18,8 @@ class Result():
1818

1919
predictionTasks: List[dict] = None
2020

21+
histogram: HistogramResult = None
22+
2123
data: dict
2224
"""Raw JSON response from RDFRules."""
2325

@@ -40,6 +42,7 @@ def _parse_data(self):
4042
rules = []
4143
predictionTasks = []
4244
evaluate = []
45+
histogram = []
4346

4447
for item in self.data:
4548
log().debug(f"Parsing item: {str(item)}")
@@ -76,6 +79,11 @@ def _parse_data(self):
7679
evaluate.append(Evaluation.model_validate(item))
7780
pass
7881

82+
case {'amount': _, 'subject': __, 'predicate': ___, 'object': ____}:
83+
# Item is a histogram
84+
histogram.append(HistogramSingleResult.model_validate(item))
85+
pass
86+
7987
case _:
8088
log().debug(f"Unknown item: {str(item)}")
8189
print("Unknown item")
@@ -84,6 +92,7 @@ def _parse_data(self):
8492
self.ruleset = Ruleset(rules = rules)
8593
self.predictionTasks = predictionTasks
8694
self.evaluate = evaluate
95+
self.histogram = HistogramResult(list = histogram)
8796

8897
def get_ruleset(self) -> Ruleset:
8998
"""Returns the ruleset generated by RDFRules.
@@ -93,4 +102,9 @@ def get_ruleset(self) -> Ruleset:
93102
def get_evaluations(self) -> List[Evaluation]:
94103
"""Returns the list of evaluations.
95104
"""
96-
return self.evaluate
105+
return self.evaluate
106+
107+
def get_histogram(self) -> HistogramResult:
108+
"""Returns the histogram.
109+
"""
110+
return self.histogram

src/pyrdfrules/rdfrules/pipeline.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ class Properties(RDFRulesTaskModel):
213213

214214
class Histogram(RDFRulesTaskModel):
215215
name: Literal["Histogram"] = "Histogram"
216-
subject: bool
217-
predicate: bool
218-
object: bool
216+
subject: Optional[bool] = None
217+
predicate: Optional[bool] = None
218+
object: Optional[bool] = None
219219

220220

221221
class LoadIndex(RDFRulesTaskModel):

src/tests/test_histogram.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""
2+
[
3+
{
4+
"name": "LoadGraph",
5+
"parameters": {
6+
"path": "/data/wn18rr/train.tsv",
7+
"settings": "tsvParsedUris"
8+
}
9+
},
10+
{
11+
"name": "Histogram",
12+
"parameters": {
13+
"subject": true,
14+
"predicate": false,
15+
"object": false
16+
}
17+
}
18+
]
19+
"""
20+
21+
import time
22+
import unittest
23+
24+
from pyrdfrules.common.http.url import Url
25+
26+
import pyrdfrules.application
27+
from pyrdfrules.common.result.histogram import HistogramResult, HistogramSingleResult
28+
from pyrdfrules.common.task.task import Task
29+
from pyrdfrules.config import Config
30+
import os
31+
32+
33+
import os
34+
import time
35+
import unittest
36+
from unittest.mock import patch, MagicMock
37+
38+
import requests
39+
import pyrdfrules
40+
from pyrdfrules.application import Application
41+
from pyrdfrules.common.result.result import Result
42+
from pyrdfrules.config import Config
43+
from pyrdfrules.config import Config
44+
from pyrdfrules.rdfrules.commondata import ConfidenceType, Constraint, RuleConsumer, RuleConsumerType, Threshold
45+
from pyrdfrules.rdfrules.jsonformats import PrefixFull
46+
from pyrdfrules.rdfrules.pipeline import ComputeConfidence, GetRules, GraphAwareRules, Histogram, Index, LoadGraph, MergeDatasets, AddPrefixes, Mine, Pipeline, SortRuleset
47+
48+
def get_path(file_name):
49+
return os.path.join(os.path.dirname((os.path.realpath(__file__))), "data", file_name)
50+
51+
# slightly modified from
52+
# from https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests
53+
def download_file(url, file_name, base_path):
54+
file_path = os.path.join(base_path, "dbpedia_yago", file_name)
55+
56+
os.makedirs(os.path.join(base_path, "dbpedia_yago"), exist_ok=True)
57+
58+
print(file_path)
59+
# check if the file already exists
60+
if os.path.exists(file_path):
61+
return
62+
63+
# NOTE the stream=True parameter below
64+
with requests.get(url, stream=True) as r:
65+
r.raise_for_status()
66+
with open(file_path, 'wb') as f:
67+
for chunk in r.iter_content(chunk_size=8192):
68+
# If you have chunk encoded response uncomment if
69+
# and set chunk_size parameter to None.
70+
#if chunk:
71+
f.write(chunk)
72+
73+
class TestHistogram(unittest.TestCase):
74+
75+
def setUp(self):
76+
# download the pipeline files
77+
self.config = Config(
78+
workspace_path=os.path.realpath(os.path.join(os.path.dirname((os.path.realpath(__file__))), "..", "rdfrules", "workspace"))
79+
)
80+
81+
download_file("http://rdfrules.vse.cz/api/workspace/dbpedia_yago/mappingbased_objects_sample.ttl", "mappingbased_objects_sample.ttl", self.config.workspace_path)
82+
83+
self.instance = app = pyrdfrules.application.Application()
84+
85+
self.rdfrules = app.start_local(
86+
install_jvm = True,
87+
install_rdfrules = True,
88+
config = self.config
89+
)
90+
91+
return super().setUp()
92+
93+
def tearDown(self):
94+
self.instance.stop()
95+
return super().tearDown()
96+
97+
def test_histogram(self):
98+
"""
99+
Runs a pipeline locally.
100+
"""
101+
102+
pipeline = Pipeline(
103+
tasks=[
104+
LoadGraph(
105+
graphName = "<dbpedia>",
106+
path = "/dbpedia_yago/mappingbased_objects_sample.ttl"
107+
),
108+
Histogram(
109+
subject=True,
110+
)
111+
]
112+
)
113+
114+
task = self.rdfrules.task.create_task(pipeline)
115+
116+
for step in self.rdfrules.task.run_task(task):
117+
print(step)
118+
self.assertIsNotNone(step, "Should not be None")
119+
self.assertIsInstance(step, Task, "Should be an instance of Task")
120+
121+
self.assertIsNotNone(task.result, "Should not be None")
122+
self.assertTrue(task.finished, "Should be finished")
123+
124+
print(task.result)
125+
126+
histogram = task.get_result().get_histogram()
127+
128+
self.assertIsInstance(histogram, HistogramResult)
129+
self.assertTrue(len(histogram.list) > 0)
130+
131+
for item in histogram.list:
132+
self.assertIsInstance(item, HistogramSingleResult)
133+
134+
top_ten_items = histogram.get_top(10)
135+
136+
self.assertTrue(len(top_ten_items) == 10)
137+
138+
print(top_ten_items)
139+
140+
histogram.print(top_n=10)
141+
142+
#for eval in task.get_result().get_evaluations():
143+
# print(eval)
144+
# eval.print()
145+
#
146+
#self.assertIsNotNone(task.result, "Should not be None")
147+
#self.assertIsInstance(task.result, list, "Should be a list")
148+
#
149+
#self.assertIsNotNone(task.get_result(), "Should not be None")
150+
#self.assertIsInstance(task.get_result(), Result, "Should be a Result")
151+
152+
if __name__ == '__main__':
153+
unittest.main()
154+
155+
156+
if __name__ == '__main__':
157+
unittest.main()

0 commit comments

Comments
 (0)