-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_loader.py
46 lines (42 loc) · 1.84 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from typing import List
import pandas as pd
from langchain_core.documents import Document
class DataLoader:
"""Class for loading and processing CSV data into LangChain Documents."""
@staticmethod
def load_data(filepath: str) -> pd.DataFrame:
"""Load and preprocess game data from a CSV file."""
try:
data = pd.read_csv(filepath)
data['current_players'] = pd.to_numeric(data['current_players'], errors='coerce')
data['peak_players_today'] = pd.to_numeric(data['peak_players_today'], errors='coerce')
data['date'] = pd.to_datetime(data['date'])
return data
except Exception as e:
raise ValueError(f"Error loading data: {str(e)}")
@staticmethod
def create_documents(data: pd.DataFrame) -> List[Document]:
"""Convert data rows into LangChain Document format."""
documents = []
for _, row in data.iterrows():
try:
content = (
f"Game: {row['name']}, Price: {row['price']}, "
f"Current Players: {row['current_players']}, "
f"Peak Players Today: {row['peak_players_today']}, "
f"Date: {row['date'].strftime('%Y-%m-%d')}"
)
doc = Document(
page_content=content,
metadata={
"name": row['name'],
"date": row['date'].isoformat(),
"price": row['price'],
"current_players": row['current_players'],
"peak_players_today": row['peak_players_today']
}
)
documents.append(doc)
except Exception as e:
print(f"Warning: Skipping row due to error: {str(e)}")
return documents