From 8de80646f2e40a20480e436019ffc102ecfe4c8d Mon Sep 17 00:00:00 2001 From: simo <49877847+saimouu@users.noreply.github.com> Date: Thu, 19 Feb 2026 15:11:56 +0200 Subject: [PATCH 1/2] Fix empty abstracts preventing file input - Change validation logic to allow empty abstracts - Empty abstracts default to NO_ABSTRACT - Empty asbtract count is returned on file upload - Warning toast if displayed in front if empty abstracts were detected --- client/src/pages/ProjectPage.tsx | 3 ++ server/src/schemas/file_service.py | 2 +- server/src/schemas/publication.py | 4 +-- server/src/services/file_service.py | 17 +++++++++-- server/src/tools/csv_file_validation.py | 38 ++++++++++++++++--------- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/client/src/pages/ProjectPage.tsx b/client/src/pages/ProjectPage.tsx index 5e7bb27..2266a8f 100644 --- a/client/src/pages/ProjectPage.tsx +++ b/client/src/pages/ProjectPage.tsx @@ -632,6 +632,9 @@ export const ProjectPage = () => { if (res.valid_filenames?.length) { toast.success(`${res.valid_filenames.length} file(s) uploaded`); } + if ((res.empty_abstract_count ?? 0) > 0) { + toast.warn(`${res.empty_abstract_count} abstracts are empty - results will not be optimal`, { autoClose: 8000 }) + } if (res.errors?.length) { ExpandableToast(res.errors); console.error("File upload errors:", res.errors); diff --git a/server/src/schemas/file_service.py b/server/src/schemas/file_service.py index ec84f8a..f449e5d 100644 --- a/server/src/schemas/file_service.py +++ b/server/src/schemas/file_service.py @@ -11,7 +11,7 @@ class FileError(BaseModel): class ProcessedFiles(BaseModel): valid_filenames: List[str] errors: List[FileError] - + empty_abstract_count: int = 0 class UploadedFilePaper(BaseModel): title: str diff --git a/server/src/schemas/publication.py b/server/src/schemas/publication.py index 85b216b..699ae6b 100644 --- a/server/src/schemas/publication.py +++ b/server/src/schemas/publication.py @@ -4,10 +4,10 @@ class PublicationRowData(BaseModel): title: str - abstract: str + abstract: Optional[str] doi: Optional[str] - @field_validator("title", "abstract") + @field_validator("title") @classmethod def check_not_empty(cls, v, field): if not isinstance(v, str) or not str(v).strip(): diff --git a/server/src/services/file_service.py b/server/src/services/file_service.py index 69aefea..34e1bc0 100644 --- a/server/src/services/file_service.py +++ b/server/src/services/file_service.py @@ -48,18 +48,25 @@ async def process_files( """ errors: List[FileError] = [] valid_filenames: List[str] = [] + empty_abstract_count_total = 0 for f in files: - validation_errors = validate_csv(f.file, f.filename or "NONE") + validation_errors, file_empty_abstracts = validate_csv( + f.file, f.filename or "NONE" + ) if validation_errors: errors.extend(validation_errors) continue + if f.filename is None: continue if f.file is None: continue if f.content_type is None: continue + + empty_abstract_count_total += file_empty_abstracts + try: file_data = FileCreate( project_uuid=project_uuid, @@ -91,6 +98,8 @@ async def process_files( } if pd.isna(normalized.get("doi")): normalized["doi"] = None + if pd.isna(normalized.get("abstract")): + normalized["abstract"] = None papers.append( PaperCreate( @@ -127,7 +136,11 @@ async def process_files( except Exception as e: raise e - return ProcessedFiles(valid_filenames=valid_filenames, errors=errors) + return ProcessedFiles( + valid_filenames=valid_filenames, + errors=errors, + empty_abstract_count=empty_abstract_count_total, + ) def create_file_service(db_ctx: DBContext) -> FileService: diff --git a/server/src/tools/csv_file_validation.py b/server/src/tools/csv_file_validation.py index a40da14..650bd3c 100644 --- a/server/src/tools/csv_file_validation.py +++ b/server/src/tools/csv_file_validation.py @@ -1,40 +1,52 @@ import io from typing import BinaryIO, List + import pandas as pd from pydantic import ValidationError -from src.schemas.publication import PublicationRowData + from src.schemas.file_service import FileError +from src.schemas.publication import PublicationRowData REQUIRED_FIELDS = {"title", "abstract", "doi"} -def validate_csv(file_obj: BinaryIO, filename: str) -> List[FileError]: +def validate_csv(file_obj: BinaryIO, filename: str) -> tuple[List[FileError], int]: errors: List[FileError] = [] + empty_abstract_count = 0 try: raw = file_obj.read() df = pd.read_csv(io.BytesIO(raw), encoding="utf-8-sig") df.columns = [str(c).strip().lower() for c in df.columns] missing = REQUIRED_FIELDS - set(df.columns) if missing: - return [ - FileError( - **{ - "file": filename or "NO_FILENAME", - "row": "header", - "message": f"Missing required columns: {', '.join(missing)}", - } - ) - ] + return ( + [ + FileError( + **{ + "file": filename or "NO_FILENAME", + "row": "header", + "message": f"Missing required columns: {', '.join(missing)}", + } + ) + ], + 0, + ) + df["doi"] = df["doi"].astype(str) for idx, row in df.iterrows(): + if pd.isna(row.get("abstract")): + empty_abstract_count += 1 + row["abstract"] = None + if pd.isna(row.get("doi")): row["doi"] = None + try: PublicationRowData(**row.to_dict()) except ValidationError as e: for err in e.errors(): errors.append( - FileError(file=filename, row=int(idx), message=err["msg"]) + FileError(file=filename, row=str(idx), message=err["msg"]) ) # type: ignore except pd.errors.ParserError as e: errors.append( @@ -51,4 +63,4 @@ def validate_csv(file_obj: BinaryIO, filename: str) -> List[FileError]: file_obj.seek(0) except Exception: pass - return errors + return errors, empty_abstract_count From d6f05371401a3dcfe83211906a08a252fd9ace9f Mon Sep 17 00:00:00 2001 From: simo <49877847+saimouu@users.noreply.github.com> Date: Thu, 19 Feb 2026 15:27:02 +0200 Subject: [PATCH 2/2] Fix validation tests --- server/src/tests/test_010_unit.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/tests/test_010_unit.py b/server/src/tests/test_010_unit.py index 654d486..99b5422 100644 --- a/server/src/tests/test_010_unit.py +++ b/server/src/tests/test_010_unit.py @@ -1,5 +1,7 @@ import io + import pytest + from src.schemas.file_service import FileError from src.tools.csv_file_validation import validate_csv @@ -8,7 +10,7 @@ def test_validate_csv_success(): csv_content = "title,abstract,doi\nTest Title,Test Abstract,10.1234/test" file_obj = io.BytesIO(csv_content.encode("utf-8")) - errors = validate_csv(file_obj, "test.csv") + errors, _ = validate_csv(file_obj, "test.csv") assert errors == [] @@ -16,7 +18,7 @@ def test_validate_csv_success(): def test_validate_csv_missing_title_field(): csv_content = "abstract,doi\nTest Abstract,10.1234/test" file_obj = io.BytesIO(csv_content.encode("utf-8")) - errors = validate_csv(file_obj, "test.csv") + errors, _ = validate_csv(file_obj, "test.csv") assert errors == [ FileError( file="test.csv", @@ -30,7 +32,7 @@ def test_validate_csv_missing_title_field(): def test_validate_csv_missing_abstract_field(): csv_content = "title,doi\nTest Title,10.1234/test" file_obj = io.BytesIO(csv_content.encode("utf-8")) - errors = validate_csv(file_obj, "test.csv") + errors, _ = validate_csv(file_obj, "test.csv") assert errors == [ FileError( file="test.csv", @@ -44,7 +46,7 @@ def test_validate_csv_missing_abstract_field(): def test_validate_csv_missing_doi_field(): csv_content = "title,abstract\nTest Title,Test Abstract" file_obj = io.BytesIO(csv_content.encode("utf-8")) - errors = validate_csv(file_obj, "test.csv") + errors, _ = validate_csv(file_obj, "test.csv") assert errors == [ FileError( file="test.csv",