Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions src/croissant_baker/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Command-line interface for Croissant Baker."""

import csv
import typer
from pathlib import Path
import importlib.metadata
Expand Down Expand Up @@ -163,21 +164,27 @@ def main(
parsed_creators = []
if creator:
for creator_info in creator:
# Parse format: "Name[,Email[,URL]]"
parts = [part.strip() for part in creator_info.split(",")]
creator_info = creator_info.strip()

if not parts[0]: # Empty name
# Preferred: semicolon
if ";" in creator_info:
creator_parts = [p.strip() for p in creator_info.split(";")]

else:
# Use CSV parsing for comma cases (handles quotes properly)
creator_parts = next(csv.reader([creator_info]))
creator_parts = [p.strip() for p in creator_parts]

if not creator_parts or not creator_parts[0]:
continue

creator_obj = {"name": parts[0]} # Name is required
creator_obj = {"name": creator_parts[0]}

# Add optional email if provided and not empty
if len(parts) > 1 and parts[1]:
creator_obj["email"] = parts[1]
if len(creator_parts) > 1 and creator_parts[1]:
creator_obj["email"] = creator_parts[1]

# Add optional URL if provided and not empty
if len(parts) > 2 and parts[2]:
creator_obj["url"] = parts[2]
if len(creator_parts) > 2 and creator_parts[2]:
creator_obj["url"] = creator_parts[2]

parsed_creators.append(creator_obj)

Expand Down
54 changes: 54 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,60 @@ def test_missing_creator_required(csv_dataset: Path, tmp_path: Path) -> None:
assert "Example:" in result.stderr


def test_creator_parsing_variants(csv_dataset: Path, tmp_path: Path) -> None:
"""Test creator parsing for comma, quoted, and semicolon formats."""

test_cases = [
# (input, expected_strings)
('"Google, LLC"', ["Google, LLC"]),
('"Google, LLC",info@google.com', ["Google, LLC", "info@google.com"]),
(
'"Google, LLC",info@google.com,https://google.com',
["Google, LLC", "info@google.com", "https://google.com"],
),
(
'"Doe, Jr., John",john@example.com',
["Doe, Jr., John", "john@example.com"],
),
# Backward compatibility
("Alice Smith", ["Alice Smith"]),
("Alice Smith,alice@example.com", ["Alice Smith", "alice@example.com"]),
(
"Alice Smith,alice@example.com,https://example.com",
["Alice Smith", "alice@example.com", "https://example.com"],
),
# Semicolon format
(
"Google, LLC;info@google.com;https://google.com",
["Google, LLC", "info@google.com", "https://google.com"],
),
]

for creator_input, expected_values in test_cases:
output = tmp_path / f"output_{hash(creator_input)}.jsonld"

result = runner.invoke(
app,
[
"--input",
str(csv_dataset),
"--output",
str(output),
"--creator",
creator_input,
],
)

assert result.exit_code == 0, f"Failed for input: {creator_input}"

content = output.read_text()

for expected in expected_values:
assert expected in content, (
f"Missing '{expected}' for input: {creator_input}"
)


def test_invalid_date_format(csv_dataset: Path, tmp_path: Path) -> None:
"""Test that invalid date format gives clear error message."""
output = tmp_path / "output.jsonld"
Expand Down