Skip to content

Commit 278c13a

Browse files
committed
Add v4 implementation (not working)
1 parent c1bc653 commit 278c13a

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

datastew/repository/weaviate.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import shutil
2+
from typing import List, Union
3+
4+
import uuid as uuid
5+
import weaviate
6+
7+
from weaviate.classes.config import Property, DataType, ReferenceProperty
8+
9+
from datastew import BaseRepository
10+
from datastew.repository import Mapping, Terminology, Concept
11+
from datastew.repository.weaviate_schema import terminology_schema, concept_schema, mapping_schema
12+
13+
14+
class WeaviateRepository(BaseRepository):
15+
16+
def __init__(self, mode="memory", path=None):
17+
self.mode = mode
18+
try:
19+
if mode == "memory":
20+
self.client = weaviate.connect_to_embedded(persistence_data_path="db")
21+
elif mode == "disk":
22+
if path is None:
23+
raise ValueError("Path must be provided for disk mode.")
24+
self.client = weaviate.connect_to_embedded(persistence_data_path=path)
25+
elif mode == "remote":
26+
if path is None:
27+
raise ValueError("Remote URL must be provided for remote mode.")
28+
self.client = weaviate.Client(path)
29+
else:
30+
raise ValueError(f'Repository mode {mode} is not defined. Use either memory, disk or remote.')
31+
except Exception as e:
32+
raise ConnectionError(f"Failed to initialize Weaviate client: {e}")
33+
self.default_collection = self.client.collections.get("default")
34+
35+
# Add schemas to Weaviate
36+
self.terminologies = self.client.collections.create(
37+
name="Terminology",
38+
properties=[
39+
Property(name="name", data_type=DataType.TEXT),
40+
]
41+
)
42+
self.concepts = self.client.collections.create(
43+
name="Concept",
44+
properties=[
45+
Property(name="conceptID", data_type=DataType.TEXT),
46+
Property(name="prefLabel", data_type=DataType.TEXT),
47+
],
48+
references=[
49+
ReferenceProperty(
50+
name="hasTerminology",
51+
target_collection="Terminology"
52+
)
53+
]
54+
)
55+
self.mappings = self.client.collections.create(
56+
name="Mapping",
57+
properties=[
58+
Property(name="text", data_type=DataType.TEXT),
59+
],
60+
references=[
61+
ReferenceProperty(
62+
name="hasConcept",
63+
target_collection="Concept"
64+
)
65+
]
66+
)
67+
68+
def store_all(self, model_object_instances):
69+
for instance in model_object_instances:
70+
self.store(instance)
71+
72+
def get_all_concepts(self) -> List[Concept]:
73+
pass
74+
75+
def get_all_terminologies(self) -> List[Terminology]:
76+
pass
77+
78+
def get_all_mappings(self, limit=1000) -> List[Mapping]:
79+
collections = self.client.collections
80+
result = self.mappings.query.fetch_objects(limit=limit)
81+
# TODO why are the references not shown?
82+
pass
83+
84+
def get_closest_mappings(self, embedding, limit=5):
85+
pass
86+
87+
def shut_down(self):
88+
if self.mode == "memory":
89+
shutil.rmtree("db")
90+
self.client.close()
91+
92+
def store(self, model_object_instance: Union[Terminology, Concept, Mapping]):
93+
random_uuid = uuid.uuid4()
94+
model_object_instance.concept_id = random_uuid
95+
try:
96+
if isinstance(model_object_instance, Terminology):
97+
properties = {
98+
"name": model_object_instance.name
99+
}
100+
self.terminologies.data.insert(properties=properties,
101+
uuid=random_uuid)
102+
elif isinstance(model_object_instance, Concept):
103+
properties = {
104+
"conceptID": model_object_instance.concept_id,
105+
"prefLabel": model_object_instance.pref_label,
106+
}
107+
references = {"hasTerminology": model_object_instance.terminology.id}
108+
self.concepts.data.insert(properties=properties,
109+
references=references,
110+
uuid=random_uuid)
111+
elif isinstance(model_object_instance, Mapping):
112+
properties = {
113+
"text": model_object_instance.text,
114+
}
115+
references = {"hasConcept": model_object_instance.concept.concept_id}
116+
self.mappings.data.insert(properties=properties,
117+
vector=model_object_instance.embedding,
118+
references=references,
119+
uuid=random_uuid)
120+
else:
121+
raise ValueError("Unsupported model object instance type.")
122+
123+
except Exception as e:
124+
raise RuntimeError(f"Failed to store object in Weaviate: {e}")

0 commit comments

Comments
 (0)