Skip to content

Commit

Permalink
Fix bug in codeinterpreter
Browse files Browse the repository at this point in the history
  • Loading branch information
homanp committed Mar 4, 2024
1 parent 08d9996 commit bee8e75
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
4 changes: 4 additions & 0 deletions service/code_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,8 @@ async def run_python(self, code: str):
f"python {codefile_path}",
)

if process.messages:
line = process.messages[0].line
return line

return process
8 changes: 3 additions & 5 deletions service/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def __init__(
)

def _get_strategy(self, type: str) -> Optional[str]:
strategies = {
"PDF": "auto",
}
strategies = {"PDF": "auto", "CSV": "auto"}
try:
return strategies[type]
except KeyError:
Expand All @@ -80,9 +78,10 @@ async def _partition_file(
# strategy = "auto"

logger.info(
f"Downloading and extracting elements from {file.url},"
f"Downloading and extracting elements from {file.url}, "
f"using `{strategy}` strategy"
)
print(file.suffix)
with NamedTemporaryFile(suffix=file.suffix, delete=True) as temp_file:
with requests.get(url=file.url) as response:
temp_file.write(response.content)
Expand Down Expand Up @@ -173,7 +172,6 @@ async def generate_chunks(
),
}
chunks.append(chunk_data)

if config.splitter.name == "semantic":
elements = await self._partition_file(
file,
Expand Down
1 change: 1 addition & 0 deletions service/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ async def handle_urls(
):
embedding_service.files = files
chunks = await embedding_service.generate_chunks(config=config)
print(chunks)
summary_documents = await embedding_service.generate_summary_documents(
documents=chunks
)
Expand Down
11 changes: 7 additions & 4 deletions service/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
from utils.summarise import SUMMARY_SUFFIX
from vectordbs import BaseVectorDatabase, get_vector_service

STRUTURED_DATA = [".xlsx", ".csv", ".json"]
STRUCTURED_DATA = [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"text/csv",
"application/json",
]


def create_route_layer() -> RouteLayer:
Expand Down Expand Up @@ -40,15 +44,14 @@ async def get_documents(
if not len(chunks):
logger.error(f"No documents found for query: {payload.input}")
return []
is_structured = chunks[0].metadata.get("document_type") in STRUTURED_DATA
is_structured = chunks[0].metadata.get("filetype") in STRUCTURED_DATA
reranked_chunks = []
if is_structured and payload.interpreter_mode:
async with CodeInterpreterService(
session_id=payload.session_id, file_urls=[chunks[0].metadata.get("doc_url")]
) as service:
code = await service.generate_code(query=payload.input)
response = await service.run_python(code=code)
output = response.stdout
output = await service.run_python(code=code)
reranked_chunks.append(
BaseDocumentChunk(
id=str(uuid4()),
Expand Down

0 comments on commit bee8e75

Please sign in to comment.