Skip to content

Commit 7cb732e

Browse files
crawler API
1 parent be87ba6 commit 7cb732e

36 files changed

+738
-344
lines changed

backend/backend.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
config["data_dir"] = "./data/testing_data/"
1818

1919
# load the persistent database using ChromaDB
20-
print('Loading DB')
20+
print("Loading DB")
2121
client = chromadb.PersistentClient(path=config["persist_dir"])
2222
# Loading the metadata for all types
2323

2424
# Setup llm chain, initialize the retriever and llm, and setup Retrieval QA
25-
print('Setting LLM chain')
25+
print("Setting LLM chain")
2626
qa_dataset_handler = QASetup(
2727
config=config,
2828
data_type="dataset",
@@ -43,7 +43,7 @@
4343
llm_chain_handler = LLMChainCreator(config=config, local=True)
4444
llm_chain_handler.enable_cache()
4545
llm_chain = llm_chain_handler.get_llm_chain()
46-
print('OK.')
46+
print("OK.")
4747

4848
# Send test query as first query to avoid cold start
4949
try:
@@ -68,7 +68,7 @@ async def read_dataset(query: str):
6868
_, ids_order = QueryProcessor(
6969
query=query,
7070
qa=qa_dataset,
71-
type_of_query='dataset',
71+
type_of_query="dataset",
7272
config=config,
7373
).get_result_from_query()
7474

@@ -89,7 +89,7 @@ async def read_flow(query: str):
8989
_, ids_order = QueryProcessor(
9090
query=query,
9191
qa=qa_flow,
92-
type_of_query='flow',
92+
type_of_query="flow",
9393
config=config,
9494
).get_result_from_query()
9595

backend/modules/metadata_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class OpenMLObjectHandler:
2020
"""
2121
Description: The base class for handling OpenML objects. The logic for handling datasets/flows are subclasses from this.
2222
"""
23+
2324
def __init__(self, config):
2425
self.config = config
2526

@@ -268,6 +269,7 @@ class OpenMLMetadataProcessor:
268269
"""
269270
Description: Process metadata using the OpenMLHandlers
270271
"""
272+
271273
def __init__(self, config: dict):
272274
self.config = config
273275
self.save_filename = os.path.join(

backend/modules/rag_llm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,18 @@ class LLMChainInitializer:
2626
"""
2727
Description: Setup the vectordb (Chroma) as a retriever with parameters
2828
"""
29+
2930
@staticmethod
3031
def initialize_llm_chain(
3132
vectordb: Chroma, config: dict
3233
) -> langchain.chains.retrieval_qa.base.RetrievalQA:
3334
if config["search_type"] == "similarity_score_threshold":
3435
return vectordb.as_retriever(
3536
search_type=config["search_type"],
36-
search_kwargs={"k": config["num_return_documents"], "score_threshold": 0.5},
37+
search_kwargs={
38+
"k": config["num_return_documents"],
39+
"score_threshold": 0.5,
40+
},
3741
)
3842
else:
3943
return vectordb.as_retriever(
@@ -46,6 +50,7 @@ class QASetup:
4650
"""
4751
Description: Setup the VectorDB, QA and initalize the LLM for each type of data
4852
"""
53+
4954
def __init__(
5055
self, config: dict, data_type: str, client: ClientAPI, subset_ids: list = None
5156
):
@@ -80,6 +85,7 @@ class LLMChainCreator:
8085
"""
8186
Description: Gets Ollama, sends query, enables query caching
8287
"""
88+
8389
def __init__(self, config: dict, local: bool = False):
8490
self.config = config
8591
self.local = local

backend/modules/vector_store_utils.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,20 @@ class DataLoader:
1414
"""
1515
Description: Used to chunk data
1616
"""
17-
def __init__(self, metadata_df: pd.DataFrame, page_content_column: str, chunk_size:int = 1000, chunk_overlap:int = 150):
17+
18+
def __init__(
19+
self,
20+
metadata_df: pd.DataFrame,
21+
page_content_column: str,
22+
chunk_size: int = 1000,
23+
chunk_overlap: int = 150,
24+
):
1825
self.metadata_df = metadata_df
1926
self.page_content_column = page_content_column
2027
self.chunk_size = chunk_size
21-
self.chunk_overlap = chunk_overlap if self.chunk_size > chunk_overlap else self.chunk_size
28+
self.chunk_overlap = (
29+
chunk_overlap if self.chunk_size > chunk_overlap else self.chunk_size
30+
)
2231

2332
def load_and_process_data(self) -> list:
2433
"""
@@ -41,6 +50,7 @@ class DocumentProcessor:
4150
"""
4251
Description: Used to generate unique documents based on text content to prevent duplicates during embedding
4352
"""
53+
4454
@staticmethod
4555
def generate_unique_documents(documents: list, db: Chroma) -> tuple:
4656
"""
@@ -74,6 +84,7 @@ class VectorStoreManager:
7484
"""
7585
Description: Manages the Vector store (chromadb) and takes care of data ingestion, loading the embedding model and embedding the data before adding it to the vector store
7686
"""
87+
7788
def __init__(self, chroma_client: ClientAPI, config: dict):
7889
self.chroma_client = chroma_client
7990
self.config = config
@@ -145,7 +156,7 @@ def load_vector_store(
145156
)
146157

147158
@staticmethod
148-
def add_documents_to_db(db, unique_docs, unique_ids, bs = 512):
159+
def add_documents_to_db(db, unique_docs, unique_ids, bs=512):
149160
"""
150161
Description: Add documents to Chroma DB in batches of bs
151162
"""
@@ -170,7 +181,9 @@ def create_vector_store(self, metadata_df: pd.DataFrame) -> Chroma:
170181
)
171182

172183
data_loader = DataLoader(
173-
metadata_df, page_content_column="Combined_information", chunk_size = self.config["chunk_size"]
184+
metadata_df,
185+
page_content_column="Combined_information",
186+
chunk_size=self.config["chunk_size"],
174187
)
175188
documents = data_loader.load_and_process_data()
176189

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

docs/evaluation/evaluation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
- It is "pretty easy" to add a new evaluation.
1515
- (Note that `training_utils.py` already overloads some classes from the original training. Which means that you can modify this to your hearts content without affecting the main code. Enjoy~)
16-
- Step 1: Find the method you want to override and overload the class/method in `training_utils.py`.
16+
- Step 1: Find the method you want to override and overload the class/method in `experiments.py`.
1717
- Step 2: Add some if statements in `class ExperimentRunner` to ensure you dont break everything.
1818
- Step 3: Follow the ExperimentRunner templates in `run_all_training.py` to add whatever you added in Step 2 as a new experiment.
1919
- Give it a custom name so it is easy to understand what happens

evaluation/README.md

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,32 @@
11
# Evaluation of LLM models and techniques
22

33
## How to run
4-
- Start the language server at the root of this repository with `./start_llm_service.sh`
5-
- Run `python run_all_training.py` to train all models (get data, create vector store for each etc)
6-
- Run `python evaluate.py` to run all evaluations
7-
- Results are found in in `./evaluation_results.csv`
4+
- Start the language server at the root of this repository with `./start_llm_service.sh` . This is important, do not skip it.
5+
- Run `python run_all_training.py` to train all models (get data, create vector store for each etc) and run the models on all possible versions of the queries.
6+
- Query templates are in `data/evaluation/query_templates.txt`. Add to this if you want different types of queries.
7+
- Run `python evaluate.py` to aggregate the results from the previous query. (This does not run the models on the queries)
8+
- Results are found in in `./evaluation_results.csv` and `evaluation_results.png`
9+
- **Important note** : If you want to re-run some experiments because things have changed and if the models that you use are the same but the data/labels are new.
10+
- Go to `/data/evaluation/{rag-model}/{llm-model}` and remove/move all the folders under it **except** `chroma_db`. If new data is added, the training loop will take care of adding them to the vector database. But if you remove this, it will take a lot longer for the data to be embedded from scratch.
11+
12+
## How to add a new evaluation
13+
14+
- It is "pretty easy" to add a new evaluation.
15+
- (Note that `training_utils.py` already overloads some classes from the original training. Which means that you can modify this to your hearts content without affecting the main code. Enjoy~)
16+
- Step 1: Find the method you want to override and overload the class/method in `experiments.py`.
17+
- Step 2: Add some if statements in `class ExperimentRunner` to ensure you dont break everything.
18+
- Step 3: Follow the ExperimentRunner templates in `run_all_training.py` to add whatever you added in Step 2 as a new experiment.
19+
- Give it a custom name so it is easy to understand what happens
20+
- Do not worry, the experiments are cached and won't run again if you have run them before.
21+
- Step 4: If you changed something from config, make sure you reset it. Since the file runs in one go, it will affect the following experiments otherwise.
22+
23+
## How to add a new metric
24+
25+
- In `evaluation_utils.py`, go to `class EvaluationProcessor`, add a new function that calculates your metric. (You can use the templates provided)
26+
- Update the metric in `self.metric_methods`
27+
- While running the evaluation, add them to your metrics list :
28+
```python
29+
metrics = ["precision", "recall", "map"]
30+
eval_path = Path("../data/evaluation/")
31+
processor = EvaluationProcessor(eval_path, sort_by=None, metrics=metrics)
32+
```

evaluation/evaluate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas as pd
55
from evaluation_utils import EvaluationProcessor
66
from tqdm import tqdm
7+
78
metrics = ["precision", "recall", "map"]
89
eval_path = Path("../data/evaluation/")
910
processor = EvaluationProcessor(eval_path, sort_by=None, metrics=metrics)

evaluation/evaluation_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ class EvaluationProcessor:
99
"""
1010
Description: Process all the evaluated results, add the required metrics and save results as a csv/generate plots
1111
"""
12-
def __init__(
13-
self, eval_path, metrics=None, sort_by="precision"
14-
):
12+
13+
def __init__(self, eval_path, metrics=None, sort_by="precision"):
1514
if metrics is None:
1615
metrics = ["precision", "recall", "map"]
1716
self.eval_path = eval_path
@@ -25,7 +24,7 @@ def __init__(
2524
self.metric_methods = {
2625
"precision": self.add_precision,
2726
"recall": self.add_recall,
28-
"map": self.add_map
27+
"map": self.add_map,
2928
}
3029

3130
def run(self):
@@ -72,7 +71,9 @@ def generate_results(self, csv_files):
7271
]
7372
).agg({"y_true": ",".join, "y_pred": ",".join})
7473

75-
grouped_results_for_y_true_and_pred = self.add_metrics(grouped_results_for_y_true_and_pred)
74+
grouped_results_for_y_true_and_pred = self.add_metrics(
75+
grouped_results_for_y_true_and_pred
76+
)
7677

7778
# aggregate by computing the average of the metrics for each group
7879
grouped_results_for_y_true_and_pred = (

evaluation/experiments.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
21
from training_utils import *
32
from concurrent.futures import ThreadPoolExecutor, as_completed
43
from tqdm.auto import tqdm
54

5+
66
def exp_0(process_query_elastic_search, eval_path, query_key_dict):
77
"""
88
EXPERIMENT 0
@@ -21,17 +21,28 @@ def exp_0(process_query_elastic_search, eval_path, query_key_dict):
2121
# Use ThreadPoolExecutor to parallelize requests
2222
with ThreadPoolExecutor(max_workers=10) as executor:
2323
# Start a future for each query
24-
futures = {executor.submit(process_query_elastic_search, query, dataset_id): query for query, dataset_id
25-
in
26-
query_key_dict.items()}
24+
futures = {
25+
executor.submit(
26+
process_query_elastic_search, query, dataset_id
27+
): query
28+
for query, dataset_id in query_key_dict.items()
29+
}
2730

2831
for future in tqdm(as_completed(futures), total=len(futures)):
2932
result = future.result()
3033
# Save the results to a CSV file
3134
for id, query in result:
3235
f.write(f"{id},None,{query},es,es,None\n")
3336

34-
def exp_1(eval_path, config, list_of_embedding_models, list_of_llm_models, subset_ids, query_key_dict):
37+
38+
def exp_1(
39+
eval_path,
40+
config,
41+
list_of_embedding_models,
42+
list_of_llm_models,
43+
subset_ids,
44+
query_key_dict,
45+
):
3546
"""
3647
EXPERIMENT 1
3748
Main evaluation loop that is used to run the base experiments using different models and embeddings.
@@ -57,6 +68,7 @@ def exp_1(eval_path, config, list_of_embedding_models, list_of_llm_models, subse
5768
)
5869
expRunner.run_experiments()
5970

71+
6072
def exp_2(eval_path, config, subset_ids, query_key_dict):
6173
"""
6274
EXPERIMENT 2
@@ -91,6 +103,7 @@ def exp_2(eval_path, config, subset_ids, query_key_dict):
91103
# reset the temperature to the default value
92104
config["temperature"] = 0.95
93105

106+
94107
def exp_3(eval_path, config, subset_ids, query_key_dict):
95108
"""
96109
EXPERIMENT 3
@@ -104,7 +117,6 @@ def exp_3(eval_path, config, subset_ids, query_key_dict):
104117
types_of_llm_apply : llm applied as reranker after the RAG pipeline
105118
"""
106119

107-
108120
list_of_embedding_models = [
109121
"BAAI/bge-large-en-v1.5",
110122
]
@@ -130,6 +142,7 @@ def exp_3(eval_path, config, subset_ids, query_key_dict):
130142
# reset the search type to the default value
131143
config["search_type"] = "similarity"
132144

145+
133146
def exp_4(eval_path, config, subset_ids, query_key_dict):
134147
"""
135148
EXPERIMENT 4
@@ -143,7 +156,6 @@ def exp_4(eval_path, config, subset_ids, query_key_dict):
143156
types_of_llm_apply : llm applied as reranker after the RAG pipeline
144157
"""
145158

146-
147159
list_of_embedding_models = [
148160
"BAAI/bge-large-en-v1.5",
149161
]
@@ -166,4 +178,4 @@ def exp_4(eval_path, config, subset_ids, query_key_dict):
166178
expRunner.run_experiments()
167179

168180
# reset the search type to the default value
169-
config["chunk_size"] = 1000
181+
config["chunk_size"] = 1000
Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
1-
#%%
1+
# %%
22
import requests
3-
#%%
3+
4+
5+
# %%
46
def get_elastic_search_results(query):
5-
query = query.replace(' ', '%20')
6-
url = 'https://es.openml.org/_search?q=' + query
7+
query = query.replace(" ", "%20")
8+
url = "https://es.openml.org/_search?q=" + query
79
response = requests.get(url)
810
response_json = response.json()
9-
return response_json['hits']['hits']
10-
#%%
11-
res = get_elastic_search_results('iris')
11+
return response_json["hits"]["hits"]
12+
13+
14+
# %%
15+
res = get_elastic_search_results("iris")
1216
# %%
13-
ids = [val['_id'] for val in res]
17+
ids = [val["_id"] for val in res]

evaluation/run_all_training.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"BAAI/bge-large-en-v1.5",
3232
"BAAI/bge-base-en-v1.5",
3333
"Snowflake/snowflake-arctic-embed-l",
34-
"Alibaba-NLP/gte-large-en-v1.5"
34+
"Alibaba-NLP/gte-large-en-v1.5",
3535
]
3636
list_of_llm_models = ["llama3"]
3737

@@ -72,7 +72,9 @@
7272
subset_ids = list(set([int(item) for sublist in subset_ids for item in sublist]))
7373
# %%
7474
# get the queries for the datasets
75-
query_key_dict = get_queries(query_templates=query_templates, load_eval_queries=load_eval_queries)
75+
query_key_dict = get_queries(
76+
query_templates=query_templates, load_eval_queries=load_eval_queries
77+
)
7678
json.dump(query_key_dict, open(eval_path / "query_key_dict.json", "w"))
7779

7880
# Run experiments on just queries and not filters
@@ -81,7 +83,14 @@
8183
exp_0(process_query_elastic_search, eval_path, query_key_dict)
8284

8385
# Experiment 1 : Run the base experiments using different models and embeddings
84-
exp_1(eval_path, config, list_of_embedding_models, list_of_llm_models, subset_ids, query_key_dict)
86+
exp_1(
87+
eval_path,
88+
config,
89+
list_of_embedding_models,
90+
list_of_llm_models,
91+
subset_ids,
92+
query_key_dict,
93+
)
8594

8695
# Experiment 2 : Evaluating temperature = 1 (default was 0.95)
8796
exp_2(eval_path, config, subset_ids, query_key_dict)

0 commit comments

Comments
 (0)