Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions client/src/pages/ProjectPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,9 @@ export const ProjectPage = () => {
if (res.valid_filenames?.length) {
toast.success(`${res.valid_filenames.length} file(s) uploaded`);
}
if ((res.empty_abstract_count ?? 0) > 0) {
toast.warn(`${res.empty_abstract_count} abstracts are empty - results will not be optimal`, { autoClose: 8000 })
}
if (res.errors?.length) {
ExpandableToast(res.errors);
console.error("File upload errors:", res.errors);
Expand Down
2 changes: 1 addition & 1 deletion server/src/schemas/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class FileError(BaseModel):
class ProcessedFiles(BaseModel):
valid_filenames: List[str]
errors: List[FileError]

empty_abstract_count: int = 0

class UploadedFilePaper(BaseModel):
title: str
Expand Down
4 changes: 2 additions & 2 deletions server/src/schemas/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

class PublicationRowData(BaseModel):
title: str
abstract: str
abstract: Optional[str]
doi: Optional[str]

@field_validator("title", "abstract")
@field_validator("title")
@classmethod
def check_not_empty(cls, v, field):
if not isinstance(v, str) or not str(v).strip():
Expand Down
17 changes: 15 additions & 2 deletions server/src/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,25 @@ async def process_files(
"""
errors: List[FileError] = []
valid_filenames: List[str] = []
empty_abstract_count_total = 0

for f in files:
validation_errors = validate_csv(f.file, f.filename or "NONE")
validation_errors, file_empty_abstracts = validate_csv(
f.file, f.filename or "NONE"
)
if validation_errors:
errors.extend(validation_errors)
continue

if f.filename is None:
continue
if f.file is None:
continue
if f.content_type is None:
continue

empty_abstract_count_total += file_empty_abstracts

try:
file_data = FileCreate(
project_uuid=project_uuid,
Expand Down Expand Up @@ -91,6 +98,8 @@ async def process_files(
}
if pd.isna(normalized.get("doi")):
normalized["doi"] = None
if pd.isna(normalized.get("abstract")):
normalized["abstract"] = None

papers.append(
PaperCreate(
Expand Down Expand Up @@ -127,7 +136,11 @@ async def process_files(
except Exception as e:
raise e

return ProcessedFiles(valid_filenames=valid_filenames, errors=errors)
return ProcessedFiles(
valid_filenames=valid_filenames,
errors=errors,
empty_abstract_count=empty_abstract_count_total,
)


def create_file_service(db_ctx: DBContext) -> FileService:
Expand Down
10 changes: 6 additions & 4 deletions server/src/tests/test_010_unit.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import io

import pytest

from src.schemas.file_service import FileError
from src.tools.csv_file_validation import validate_csv

Expand All @@ -8,15 +10,15 @@
def test_validate_csv_success():
csv_content = "title,abstract,doi\nTest Title,Test Abstract,10.1234/test"
file_obj = io.BytesIO(csv_content.encode("utf-8"))
errors = validate_csv(file_obj, "test.csv")
errors, _ = validate_csv(file_obj, "test.csv")
assert errors == []


@pytest.mark.unit
def test_validate_csv_missing_title_field():
csv_content = "abstract,doi\nTest Abstract,10.1234/test"
file_obj = io.BytesIO(csv_content.encode("utf-8"))
errors = validate_csv(file_obj, "test.csv")
errors, _ = validate_csv(file_obj, "test.csv")
assert errors == [
FileError(
file="test.csv",
Expand All @@ -30,7 +32,7 @@ def test_validate_csv_missing_title_field():
def test_validate_csv_missing_abstract_field():
csv_content = "title,doi\nTest Title,10.1234/test"
file_obj = io.BytesIO(csv_content.encode("utf-8"))
errors = validate_csv(file_obj, "test.csv")
errors, _ = validate_csv(file_obj, "test.csv")
assert errors == [
FileError(
file="test.csv",
Expand All @@ -44,7 +46,7 @@ def test_validate_csv_missing_abstract_field():
def test_validate_csv_missing_doi_field():
csv_content = "title,abstract\nTest Title,Test Abstract"
file_obj = io.BytesIO(csv_content.encode("utf-8"))
errors = validate_csv(file_obj, "test.csv")
errors, _ = validate_csv(file_obj, "test.csv")
assert errors == [
FileError(
file="test.csv",
Expand Down
38 changes: 25 additions & 13 deletions server/src/tools/csv_file_validation.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,52 @@
import io
from typing import BinaryIO, List

import pandas as pd
from pydantic import ValidationError
from src.schemas.publication import PublicationRowData

from src.schemas.file_service import FileError
from src.schemas.publication import PublicationRowData

REQUIRED_FIELDS = {"title", "abstract", "doi"}


def validate_csv(file_obj: BinaryIO, filename: str) -> List[FileError]:
def validate_csv(file_obj: BinaryIO, filename: str) -> tuple[List[FileError], int]:
errors: List[FileError] = []
empty_abstract_count = 0
try:
raw = file_obj.read()
df = pd.read_csv(io.BytesIO(raw), encoding="utf-8-sig")
df.columns = [str(c).strip().lower() for c in df.columns]
missing = REQUIRED_FIELDS - set(df.columns)
if missing:
return [
FileError(
**{
"file": filename or "NO_FILENAME",
"row": "header",
"message": f"Missing required columns: {', '.join(missing)}",
}
)
]
return (
[
FileError(
**{
"file": filename or "NO_FILENAME",
"row": "header",
"message": f"Missing required columns: {', '.join(missing)}",
}
)
],
0,
)

df["doi"] = df["doi"].astype(str)
for idx, row in df.iterrows():
if pd.isna(row.get("abstract")):
empty_abstract_count += 1
row["abstract"] = None

if pd.isna(row.get("doi")):
row["doi"] = None

try:
PublicationRowData(**row.to_dict())
except ValidationError as e:
for err in e.errors():
errors.append(
FileError(file=filename, row=int(idx), message=err["msg"])
FileError(file=filename, row=str(idx), message=err["msg"])
) # type: ignore
except pd.errors.ParserError as e:
errors.append(
Expand All @@ -51,4 +63,4 @@ def validate_csv(file_obj: BinaryIO, filename: str) -> List[FileError]:
file_obj.seek(0)
except Exception:
pass
return errors
return errors, empty_abstract_count