forked from thomashacker/weaviate-magic-chat-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinitialize_data.py
64 lines (52 loc) · 2.31 KB
/
initialize_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import weaviate
import weaviate.classes.config as wc
from weaviate_cluster.client.client import connect_to_cluster
def load_data(client: weaviate.WeaviateClient):
with open('weaviate_cluster/data/galaxy.json', 'r') as file:
data = json.load(file)
solution_objs = list()
for i, d in enumerate(data):
solution_objs.append({
"phase": d["Phase"],
"question": d["Question"],
"answer": d["Answer"],
"categories": d["Categories"],
"solution": d["Solution"],
"link": d["Link"],
"price": d["Price"],
})
solutions = client.collections.get("Solutions")
solutions.data.insert_many(solution_objs)
with open('weaviate_cluster/data/conference_talks.json', 'r') as file:
data = json.load(file)
conf_talk_objects = list()
for i, d in enumerate(data):
conf_talk_objects.append({
"title": d["title"],
"time": d["time"],
"speaker": d["speaker"],
"company": d["company"],
"keywords": d["keywords"],
"abstract": d["abstract"],
})
conf_talks = client.collections.get("ConferenceTalks")
conf_talks.data.insert_many(conf_talk_objects)
def main() -> None:
print("=== LOADING HITCHHIKER DATA ===")
with connect_to_cluster() as client:
if not client.is_ready():
raise Exception("Weaviate cluster is not ready!")
client.collections.delete(name="ConferenceTalks")
client.collections.create(name="ConferenceTalks",
# If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
# Ensure the `generative-openai` module is used for generative queries
generative_config=wc.Configure.Generative.openai())
client.collections.delete(name="Solutions")
client.collections.create(name="Solutions",
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
generative_config=wc.Configure.Generative.openai())
load_data(client)
if __name__ == "__main__":
main()