From d6bffd05515c69fc1bd57e85b91c48264eddf69b Mon Sep 17 00:00:00 2001
From: Alessio <alessio.vertemati@gmail.com>
Date: Tue, 9 Dec 2025 19:17:19 +0100
Subject: [PATCH 1/3] PDF merge command

---
 src/parxy_cli/cli.py          |   2 +
 src/parxy_cli/commands/pdf.py | 281 ++++++++++++++++++++++
 tests/commands/test_pdf.py    | 439 ++++++++++++++++++++++++++++++++++
 3 files changed, 722 insertions(+)
 create mode 100644 src/parxy_cli/commands/pdf.py
 create mode 100644 tests/commands/test_pdf.py

diff --git a/src/parxy_cli/cli.py b/src/parxy_cli/cli.py
index 9b7d429..7bb086d 100644
--- a/src/parxy_cli/cli.py
+++ b/src/parxy_cli/cli.py
@@ -16,6 +16,7 @@
 from parxy_cli.commands.env import app as env_command
 from parxy_cli.commands.version import app as version_command
 from parxy_cli.commands.markdown import app as markdown_command
+from parxy_cli.commands.pdf import app as pdf_command
 
 
 # Create typer app
@@ -71,6 +72,7 @@ def main(
 app.add_typer(env_command)
 app.add_typer(version_command)
 app.add_typer(markdown_command)
+app.add_typer(pdf_command)
 
 
 def main():
diff --git a/src/parxy_cli/commands/pdf.py b/src/parxy_cli/commands/pdf.py
new file mode 100644
index 0000000..49c4a3b
--- /dev/null
+++ b/src/parxy_cli/commands/pdf.py
@@ -0,0 +1,281 @@
+"""PDF manipulation commands."""
+
+import re
+from pathlib import Path
+from typing import List, Annotated, Optional, Tuple
+
+import typer
+import pymupdf
+
+from parxy_cli.console.console import Console
+
+app = typer.Typer()
+
+console = Console()
+
+
+def parse_input_with_pages(
+    input_str: str,
+) -> Tuple[str, Optional[int], Optional[int]]:
+    """
+    Parse input string to extract file path and page range.
+
+    Supports formats:
+    - file.pdf[1] - single page (1-based)
+    - file.pdf[:2] - from start to page 2 (1-based, inclusive)
+    - file.pdf[3:] - from page 3 to end (1-based)
+    - file.pdf[3:5] - from page 3 to 5 (1-based, inclusive)
+    - file.pdf - all pages
+
+    Args:
+        input_str: Input string with optional page range
+
+    Returns:
+        Tuple of (file_path, from_page, to_page) where pages are 0-based for PyMuPDF.
+        from_page and to_page are None if no range specified or represent the range to use.
+    """
+    # Match pattern: filename[range]
+    pattern = r'^(.+?)\[([^\]]+)\]$'
+    match = re.match(pattern, input_str)
+
+    if not match:
+        # No page range specified
+        return input_str, None, None
+
+    file_path = match.group(1)
+    page_range = match.group(2)
+
+    # Parse the page range
+    if ':' in page_range:
+        # Range format [start:end]
+        parts = page_range.split(':', 1)
+        start_str = parts[0].strip()
+        end_str = parts[1].strip()
+
+        # Convert to 0-based indices
+        # PyMuPDF uses 0-based indexing
+        from_page = (int(start_str) - 1) if start_str else 0
+        to_page = (int(end_str) - 1) if end_str else None  # None means last page
+
+    else:
+        # Single page [n]
+        page_num = int(page_range) - 1  # Convert to 0-based
+        from_page = page_num
+        to_page = page_num
+
+    return file_path, from_page, to_page
+
+
+def collect_pdf_files_with_ranges(
+    inputs: List[str],
+) -> List[Tuple[Path, Optional[int], Optional[int]]]:
+    """
+    Collect PDF files from the input list with optional page ranges.
+
+    For folders, only files in the exact directory are collected (non-recursive).
+    For files with page ranges (e.g., file.pdf[1:3]), parse and extract the range.
+
+    Args:
+        inputs: List of file paths (with optional page ranges) and/or folder paths
+
+    Returns:
+        List of tuples: (Path, from_page, to_page) where pages are 0-based.
+        from_page and to_page are None if all pages should be included.
+    """
+    files = []
+
+    for input_str in inputs:
+        # Parse the input to extract file path and page range
+        file_path_str, from_page, to_page = parse_input_with_pages(input_str)
+        path = Path(file_path_str)
+
+        if path.is_file():
+            # Check if it's a PDF
+            if path.suffix.lower() == '.pdf':
+                files.append((path, from_page, to_page))
+            else:
+                console.warning(f'Skipping non-PDF file: {file_path_str}')
+        elif path.is_dir():
+            # Non-recursive: only files in the given directory
+            # Directories cannot have page ranges
+            if from_page is not None or to_page is not None:
+                console.warning(
+                    f'Page ranges are not supported for directories: {input_str}'
+                )
+            pdf_files = sorted(path.glob('*.pdf'))
+            if pdf_files:
+                # Add all PDFs from directory without page ranges
+                files.extend([(f, None, None) for f in pdf_files])
+            else:
+                console.warning(f'No PDF files found in directory: {file_path_str}')
+        else:
+            console.warning(f'Path not found: {file_path_str}')
+
+    return files
+
+
+@app.command(name='pdf:merge', help='Merge multiple PDF files into a single PDF')
+def merge(
+    inputs: Annotated[
+        List[str],
+        typer.Argument(
+            help='One or more PDF files or folders to merge. Files support page ranges in square brackets (e.g., file.pdf[1:3]). Folders are processed non-recursively.',
+        ),
+    ],
+    output: Annotated[
+        str,
+        typer.Option(
+            '--output',
+            '-o',
+            help='Output file path for the merged PDF. If not specified, you will be prompted.',
+        ),
+    ] = None,
+):
+    """
+    Merge multiple PDF files into a single PDF.
+
+    Files are merged in the order they are provided. When a folder is specified,
+    PDF files in that folder are included (non-recursively) and sorted alphabetically.
+
+    Page ranges can be specified using square brackets with 1-based indexing:
+    - file.pdf[1] - only page 1
+    - file.pdf[:2] - from first page to page 2 (inclusive)
+    - file.pdf[3:] - from page 3 to the end
+    - file.pdf[3:5] - from page 3 to page 5 (inclusive)
+    - file.pdf - all pages (no brackets)
+
+    Examples:
+
+        # Merge specific files with output specified
+        parxy pdf:merge file1.pdf file2.pdf -o merged.pdf
+
+        # Merge files - will prompt for output filename
+        parxy pdf:merge file1.pdf file2.pdf
+
+        # Merge with page ranges - take page 1 from file1, pages 2-4 from file2
+        parxy pdf:merge file1.pdf[1] file2.pdf[2:4] -o merged.pdf
+
+        # Merge specific pages from multiple files
+        parxy pdf:merge doc1.pdf[:3] doc2.pdf[5:] doc3.pdf[2] -o combined.pdf
+
+        # Mix full files and page ranges
+        parxy pdf:merge cover.pdf report.pdf[1:10] appendix.pdf -o final.pdf
+
+        # Merge all PDFs in a folder
+        parxy pdf:merge /path/to/folder -o merged.pdf
+
+        # Merge files and folders
+        parxy pdf:merge doc1.pdf /path/to/folder doc2.pdf -o merged.pdf
+    """
+    console.action('Merge PDF files', space_after=False)
+
+    # Collect all PDF files with page ranges
+    files_with_ranges = collect_pdf_files_with_ranges(inputs)
+
+    if not files_with_ranges:
+        console.error('No PDF files found to merge.', panel=True)
+        raise typer.Exit(1)
+
+    if len(files_with_ranges) < 2:
+        console.warning(
+            'Only one PDF file found. At least two files are needed for merging.',
+            panel=True,
+        )
+        raise typer.Exit(1)
+
+    console.info(
+        f'Found {len(files_with_ranges)} PDF file{"s" if len(files_with_ranges) > 1 else ""} to merge'
+    )
+
+    # Handle output path
+    if output is None:
+        output = typer.prompt('Enter output filename or path')
+
+    output_path = Path(output)
+
+    # If only a filename is provided (not an absolute path), use the first input file's directory
+    if not output_path.is_absolute() and output_path.parent == Path('.'):
+        first_file = files_with_ranges[0][0]
+        output_path = first_file.parent / output_path
+
+    # Ensure the output has .pdf extension
+    if output_path.suffix.lower() != '.pdf':
+        output_path = output_path.with_suffix('.pdf')
+
+    # Create output directory if it doesn't exist
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Merge PDFs
+    try:
+        with console.shimmer(f'Merging {len(files_with_ranges)} PDF files...'):
+            merged_pdf = pymupdf.open()
+
+            for file_path, from_page, to_page in files_with_ranges:
+                try:
+                    pdf = pymupdf.open(file_path)
+
+                    # Determine page range to insert
+                    if from_page is None and to_page is None:
+                        # Insert all pages
+                        page_info = 'all pages'
+                        merged_pdf.insert_pdf(pdf)
+                    else:
+                        # Insert specific page range
+                        # PyMuPDF insert_pdf uses from_page and to_page (inclusive, 0-based)
+                        actual_from = from_page if from_page is not None else 0
+                        actual_to = to_page if to_page is not None else (len(pdf) - 1)
+
+                        # Validate page range
+                        if actual_from < 0 or actual_from >= len(pdf):
+                            console.warning(
+                                f'Invalid page range for {file_path.name}: page {actual_from + 1} does not exist'
+                            )
+                            pdf.close()
+                            continue
+
+                        if actual_to < 0 or actual_to >= len(pdf):
+                            console.warning(
+                                f'Invalid page range for {file_path.name}: page {actual_to + 1} does not exist'
+                            )
+                            pdf.close()
+                            continue
+
+                        if actual_from > actual_to:
+                            console.warning(
+                                f'Invalid page range for {file_path.name}: start page {actual_from + 1} > end page {actual_to + 1}'
+                            )
+                            pdf.close()
+                            continue
+
+                        # Format page info for display (1-based)
+                        if actual_from == actual_to:
+                            page_info = f'page {actual_from + 1}'
+                        else:
+                            page_info = f'pages {actual_from + 1}-{actual_to + 1}'
+
+                        merged_pdf.insert_pdf(
+                            pdf, from_page=actual_from, to_page=actual_to
+                        )
+
+                    console.print(
+                        f'[faint]⎿ [/faint] Adding {file_path.name} ({page_info})'
+                    )
+                    pdf.close()
+
+                except Exception as e:
+                    console.error(f'Error processing {file_path.name}: {str(e)}')
+                    merged_pdf.close()
+                    raise typer.Exit(1)
+
+            # Save the merged PDF
+            merged_pdf.save(str(output_path))
+            merged_pdf.close()
+
+        console.newline()
+        console.success(
+            f'Successfully merged {len(files_with_ranges)} files into {output_path}'
+        )
+
+    except Exception as e:
+        console.error(f'Error during merge: {str(e)}')
+        raise typer.Exit(1)
diff --git a/tests/commands/test_pdf.py b/tests/commands/test_pdf.py
new file mode 100644
index 0000000..21bdd2e
--- /dev/null
+++ b/tests/commands/test_pdf.py
@@ -0,0 +1,439 @@
+"""Test suite for PDF commands."""
+
+import pytest
+from pathlib import Path
+from typer.testing import CliRunner
+import pymupdf
+
+from parxy_cli.commands.pdf import (
+    app,
+    parse_input_with_pages,
+    collect_pdf_files_with_ranges,
+)
+
+
+@pytest.fixture
+def runner():
+    """Fixture providing a CLI runner."""
+    return CliRunner()
+
+
+@pytest.fixture
+def sample_pdfs(tmp_path):
+    """Create sample PDF files for testing."""
+    # Create first PDF with 3 pages
+    pdf1_path = tmp_path / 'doc1.pdf'
+    pdf1 = pymupdf.open()
+    for i in range(3):
+        page = pdf1.new_page(width=612, height=792)
+        page.insert_text((100, 100), f'Page {i + 1} of doc1')
+    pdf1.save(str(pdf1_path))
+    pdf1.close()
+
+    # Create second PDF with 2 pages
+    pdf2_path = tmp_path / 'doc2.pdf'
+    pdf2 = pymupdf.open()
+    for i in range(2):
+        page = pdf2.new_page(width=612, height=792)
+        page.insert_text((100, 100), f'Page {i + 1} of doc2')
+    pdf2.save(str(pdf2_path))
+    pdf2.close()
+
+    # Create third PDF with 5 pages
+    pdf3_path = tmp_path / 'doc3.pdf'
+    pdf3 = pymupdf.open()
+    for i in range(5):
+        page = pdf3.new_page(width=612, height=792)
+        page.insert_text((100, 100), f'Page {i + 1} of doc3')
+    pdf3.save(str(pdf3_path))
+    pdf3.close()
+
+    return {
+        'pdf1': pdf1_path,
+        'pdf2': pdf2_path,
+        'pdf3': pdf3_path,
+        'tmp_path': tmp_path,
+    }
+
+
+@pytest.fixture
+def pdf_folder(tmp_path):
+    """Create a folder with multiple PDFs."""
+    folder = tmp_path / 'pdfs'
+    folder.mkdir()
+
+    # Create three PDFs in the folder
+    for i in range(1, 4):
+        pdf_path = folder / f'file{i}.pdf'
+        pdf = pymupdf.open()
+        page = pdf.new_page(width=612, height=792)
+        page.insert_text((100, 100), f'Content of file{i}')
+        pdf.save(str(pdf_path))
+        pdf.close()
+
+    return folder
+
+
+# Tests for parse_input_with_pages helper function
+class TestParseInputWithPages:
+    """Tests for the parse_input_with_pages helper function."""
+
+    def test_no_page_range(self):
+        """Test parsing input without page range."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf')
+        assert file_path == 'file.pdf'
+        assert from_page is None
+        assert to_page is None
+
+    def test_single_page(self):
+        """Test parsing single page specification."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf[3]')
+        assert file_path == 'file.pdf'
+        assert from_page == 2  # 0-based index
+        assert to_page == 2
+
+    def test_range_from_start(self):
+        """Test parsing range from start to specified page."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf[:5]')
+        assert file_path == 'file.pdf'
+        assert from_page == 0
+        assert to_page == 4  # 0-based index
+
+    def test_range_to_end(self):
+        """Test parsing range from specified page to end."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf[3:]')
+        assert file_path == 'file.pdf'
+        assert from_page == 2  # 0-based index
+        assert to_page is None
+
+    def test_range_both_bounds(self):
+        """Test parsing range with both start and end specified."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf[2:5]')
+        assert file_path == 'file.pdf'
+        assert from_page == 1  # 0-based index
+        assert to_page == 4  # 0-based index
+
+    def test_path_with_spaces(self):
+        """Test parsing path with spaces."""
+        file_path, from_page, to_page = parse_input_with_pages(
+            'path with spaces/file.pdf[1:3]'
+        )
+        assert file_path == 'path with spaces/file.pdf'
+        assert from_page == 0
+        assert to_page == 2
+
+    def test_path_with_brackets_in_name(self):
+        """Test parsing path without page range but with brackets elsewhere."""
+        file_path, from_page, to_page = parse_input_with_pages('file.pdf')
+        assert file_path == 'file.pdf'
+        assert from_page is None
+        assert to_page is None
+
+
+# Tests for collect_pdf_files_with_ranges helper function
+class TestCollectPdfFilesWithRanges:
+    """Tests for the collect_pdf_files_with_ranges helper function."""
+
+    def test_single_file_no_range(self, sample_pdfs):
+        """Test collecting a single file without page range."""
+        files = collect_pdf_files_with_ranges([str(sample_pdfs['pdf1'])])
+        assert len(files) == 1
+        assert files[0][0] == sample_pdfs['pdf1']
+        assert files[0][1] is None  # from_page
+        assert files[0][2] is None  # to_page
+
+    def test_single_file_with_range(self, sample_pdfs):
+        """Test collecting a single file with page range."""
+        files = collect_pdf_files_with_ranges([f'{sample_pdfs["pdf1"]}[1:2]'])
+        assert len(files) == 1
+        assert files[0][0] == sample_pdfs['pdf1']
+        assert files[0][1] == 0  # from_page (0-based)
+        assert files[0][2] == 1  # to_page (0-based)
+
+    def test_multiple_files(self, sample_pdfs):
+        """Test collecting multiple files."""
+        files = collect_pdf_files_with_ranges(
+            [str(sample_pdfs['pdf1']), str(sample_pdfs['pdf2'])]
+        )
+        assert len(files) == 2
+        assert files[0][0] == sample_pdfs['pdf1']
+        assert files[1][0] == sample_pdfs['pdf2']
+
+    def test_folder_input(self, pdf_folder):
+        """Test collecting PDFs from a folder."""
+        files = collect_pdf_files_with_ranges([str(pdf_folder)])
+        assert len(files) == 3
+        # Files should be sorted alphabetically
+        file_names = [f[0].name for f in files]
+        assert file_names == ['file1.pdf', 'file2.pdf', 'file3.pdf']
+
+    def test_mixed_files_and_folders(self, sample_pdfs, pdf_folder):
+        """Test collecting from both files and folders."""
+        files = collect_pdf_files_with_ranges(
+            [str(sample_pdfs['pdf1']), str(pdf_folder)]
+        )
+        assert len(files) == 4  # 1 file + 3 from folder
+
+    def test_nonexistent_file(self, tmp_path):
+        """Test handling of nonexistent file."""
+        files = collect_pdf_files_with_ranges([str(tmp_path / 'nonexistent.pdf')])
+        assert len(files) == 0
+
+    def test_non_pdf_file(self, tmp_path):
+        """Test handling of non-PDF file."""
+        txt_file = tmp_path / 'file.txt'
+        txt_file.write_text('not a pdf')
+        files = collect_pdf_files_with_ranges([str(txt_file)])
+        assert len(files) == 0
+
+    def test_empty_folder(self, tmp_path):
+        """Test handling of empty folder."""
+        empty_folder = tmp_path / 'empty'
+        empty_folder.mkdir()
+        files = collect_pdf_files_with_ranges([str(empty_folder)])
+        assert len(files) == 0
+
+    def test_folder_with_page_range_warning(self, pdf_folder):
+        """Test that page ranges on folders produce warning."""
+        files = collect_pdf_files_with_ranges([f'{pdf_folder}[1:3]'])
+        # Should still collect files but ignore the page range
+        assert len(files) == 3
+        # All files should have no page ranges
+        for file_path, from_page, to_page in files:
+            assert from_page is None
+            assert to_page is None
+
+
+# Tests for the merge command
+class TestMergeCommand:
+    """Tests for the pdf:merge command."""
+
+    def test_merge_two_files_basic(self, runner, sample_pdfs):
+        """Test basic merge of two PDF files."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf1']),
+                str(sample_pdfs['pdf2']),
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        # Verify the merged PDF has correct number of pages
+        merged = pymupdf.open(str(output))
+        assert len(merged) == 5  # 3 pages from pdf1 + 2 pages from pdf2
+        merged.close()
+
+    def test_merge_with_page_ranges(self, runner, sample_pdfs):
+        """Test merging with specific page ranges."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                f'{sample_pdfs["pdf1"]}[1:2]',  # First 2 pages
+                f'{sample_pdfs["pdf2"]}[1]',  # Only first page
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        # Verify the merged PDF has correct number of pages
+        merged = pymupdf.open(str(output))
+        assert len(merged) == 3  # 2 pages from pdf1 + 1 page from pdf2
+        merged.close()
+
+    def test_merge_folder(self, runner, pdf_folder, tmp_path):
+        """Test merging all PDFs in a folder."""
+        output = tmp_path / 'merged.pdf'
+        result = runner.invoke(
+            app, ['pdf:merge', str(pdf_folder), '--output', str(output)]
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        # Verify the merged PDF has correct number of pages (3 PDFs with 1 page each)
+        merged = pymupdf.open(str(output))
+        assert len(merged) == 3
+        merged.close()
+
+    def test_merge_without_output_prompts(self, runner, sample_pdfs):
+        """Test that merge prompts for output when not specified."""
+        result = runner.invoke(
+            app,
+            ['pdf:merge', str(sample_pdfs['pdf1']), str(sample_pdfs['pdf2'])],
+            input='output.pdf\n',
+        )
+
+        # Command should prompt for output filename
+        assert 'Enter output filename' in result.stdout
+        assert result.exit_code == 0
+
+    def test_merge_adds_pdf_extension(self, runner, sample_pdfs):
+        """Test that .pdf extension is added if missing."""
+        output = sample_pdfs['tmp_path'] / 'merged'  # No extension
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf1']),
+                str(sample_pdfs['pdf2']),
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        # Output should have .pdf extension added
+        assert (sample_pdfs['tmp_path'] / 'merged.pdf').exists()
+
+    def test_merge_creates_output_directory(self, runner, sample_pdfs):
+        """Test that output directory is created if it doesn't exist."""
+        output = sample_pdfs['tmp_path'] / 'subdir' / 'nested' / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf1']),
+                str(sample_pdfs['pdf2']),
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+        assert output.parent.exists()
+
+    def test_merge_single_file_fails(self, runner, sample_pdfs):
+        """Test that merging a single file fails with appropriate message."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            ['pdf:merge', str(sample_pdfs['pdf1']), '--output', str(output)],
+        )
+
+        assert result.exit_code == 1
+        assert 'at least two files' in result.stdout.lower()
+
+    def test_merge_no_files_fails(self, runner, tmp_path):
+        """Test that merging with no valid files fails."""
+        output = tmp_path / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            ['pdf:merge', str(tmp_path / 'nonexistent.pdf'), '--output', str(output)],
+        )
+
+        assert result.exit_code == 1
+        assert 'no pdf files found' in result.stdout.lower()
+
+    def test_merge_with_invalid_page_range(self, runner, sample_pdfs):
+        """Test merging with invalid page range."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        # pdf1 has only 3 pages, trying to access page 10
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                f'{sample_pdfs["pdf1"]}[10]',
+                str(sample_pdfs['pdf2']),
+                '--output',
+                str(output),
+            ],
+        )
+
+        # Should show warning but continue with pdf2
+        assert 'invalid page range' in result.stdout.lower() or result.exit_code == 0
+
+    def test_merge_mixed_files_and_ranges(self, runner, sample_pdfs):
+        """Test merging mix of full files and page ranges."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf1']),  # All pages (3)
+                f'{sample_pdfs["pdf3"]}[2:4]',  # Pages 2-4 (3 pages)
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        merged = pymupdf.open(str(output))
+        assert len(merged) == 6  # 3 from pdf1 + 3 from pdf3[2:4]
+        merged.close()
+
+    def test_merge_with_open_ended_range(self, runner, sample_pdfs):
+        """Test merging with open-ended page ranges."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                f'{sample_pdfs["pdf3"]}[:2]',  # First 2 pages
+                f'{sample_pdfs["pdf3"]}[3:]',  # From page 3 to end (3 pages)
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        merged = pymupdf.open(str(output))
+        assert len(merged) == 5  # 2 + 3 pages
+        merged.close()
+
+    def test_merge_preserves_order(self, runner, sample_pdfs):
+        """Test that files are merged in the specified order."""
+        output = sample_pdfs['tmp_path'] / 'merged.pdf'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf2']),
+                str(sample_pdfs['pdf1']),
+                '--output',
+                str(output),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output.exists()
+
+        merged = pymupdf.open(str(output))
+        # Should be pdf2 pages first, then pdf1 pages
+        assert len(merged) == 5
+        # We can't easily verify order without reading content, but count is correct
+        merged.close()
+
+    def test_merge_relative_output_path(self, runner, sample_pdfs):
+        """Test that relative output path uses first file's directory."""
+        result = runner.invoke(
+            app,
+            [
+                'pdf:merge',
+                str(sample_pdfs['pdf1']),
+                str(sample_pdfs['pdf2']),
+                '--output',
+                'merged.pdf',  # Relative path
+            ],
+        )
+
+        assert result.exit_code == 0
+        # Output should be in same directory as first input file
+        expected_output = sample_pdfs['pdf1'].parent / 'merged.pdf'
+        assert expected_output.exists()

From 69f466add7e2051053c04e9de43ab8a6e2104292 Mon Sep 17 00:00:00 2001
From: Alessio <alessio.vertemati@gmail.com>
Date: Tue, 9 Dec 2025 20:55:02 +0100
Subject: [PATCH 2/3] Add pdf:split command

---
 src/parxy_cli/commands/pdf.py | 116 +++++++++++++++++++++++++++++
 tests/commands/test_pdf.py    | 134 ++++++++++++++++++++++++++++++++++
 2 files changed, 250 insertions(+)

diff --git a/src/parxy_cli/commands/pdf.py b/src/parxy_cli/commands/pdf.py
index 49c4a3b..700bfd6 100644
--- a/src/parxy_cli/commands/pdf.py
+++ b/src/parxy_cli/commands/pdf.py
@@ -279,3 +279,119 @@ def merge(
     except Exception as e:
         console.error(f'Error during merge: {str(e)}')
         raise typer.Exit(1)
+
+
+@app.command(name='pdf:split', help='Split a PDF file into individual pages')
+def split(
+    input_file: Annotated[
+        str,
+        typer.Argument(
+            help='PDF file to split',
+        ),
+    ],
+    output_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            '--output',
+            '-o',
+            help='Output directory for split files. If not specified, creates a folder next to the input file.',
+        ),
+    ] = None,
+    prefix: Annotated[
+        Optional[str],
+        typer.Option(
+            '--prefix',
+            '-p',
+            help='Prefix for output filenames. If not specified, uses the input filename.',
+        ),
+    ] = None,
+):
+    """
+    Split a PDF file into individual pages.
+
+    Each page becomes a separate PDF file in the output directory.
+
+    Output files are named: {prefix}_page_{number}.pdf
+
+    Examples:
+
+        # Split into individual pages (default behavior)
+        parxy pdf:split document.pdf
+
+        # Split with custom output directory
+        parxy pdf:split document.pdf -o /path/to/output
+
+        # Split with custom prefix
+        parxy pdf:split document.pdf --prefix chapter
+
+        # Split with custom output and prefix
+        parxy pdf:split report.pdf -o ./pages -p page
+    """
+    console.action('Split PDF file', space_after=False)
+
+    # Validate input file
+    input_path = Path(input_file)
+    if not input_path.is_file():
+        console.error(f'Input file not found: {input_file}', panel=True)
+        raise typer.Exit(1)
+
+    if input_path.suffix.lower() != '.pdf':
+        console.error(f'Input file must be a PDF: {input_file}', panel=True)
+        raise typer.Exit(1)
+
+    # Determine output directory
+    if output_dir is None:
+        # Create a folder next to the input file
+        output_path = input_path.parent / f'{input_path.stem}_split'
+    else:
+        output_path = Path(output_dir)
+
+    # Create output directory
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    # Determine filename prefix
+    if prefix is None:
+        prefix = input_path.stem
+
+    # Open and process the PDF
+    try:
+        pdf = pymupdf.open(input_path)
+        total_pages = len(pdf)
+
+        if total_pages == 0:
+            console.error('PDF file is empty (no pages)', panel=True)
+            pdf.close()
+            raise typer.Exit(1)
+
+        console.info(
+            f'Processing PDF with {total_pages} page{"s" if total_pages > 1 else ""}'
+        )
+        console.info(
+            f'Splitting into {total_pages} file{"s" if total_pages > 1 else ""}'
+        )
+
+        with console.shimmer(f'Splitting PDF...'):
+            output_files = []
+
+            # Split into individual pages
+            for page_num in range(total_pages):
+                output_file = output_path / f'{prefix}_page_{page_num + 1}.pdf'
+                output_pdf = pymupdf.open()
+                output_pdf.insert_pdf(pdf, from_page=page_num, to_page=page_num)
+                output_pdf.save(str(output_file))
+                output_pdf.close()
+                output_files.append(output_file)
+                console.print(
+                    f'[faint]⎿ [/faint] Created {output_file.name} (page {page_num + 1})'
+                )
+
+        pdf.close()
+
+        console.newline()
+        console.success(
+            f'Successfully split PDF into {len(output_files)} file{"s" if len(output_files) > 1 else ""} in {output_path}'
+        )
+
+    except Exception as e:
+        console.error(f'Error during split: {str(e)}')
+        raise typer.Exit(1)
diff --git a/tests/commands/test_pdf.py b/tests/commands/test_pdf.py
index 21bdd2e..72ee0fc 100644
--- a/tests/commands/test_pdf.py
+++ b/tests/commands/test_pdf.py
@@ -437,3 +437,137 @@ def test_merge_relative_output_path(self, runner, sample_pdfs):
         # Output should be in same directory as first input file
         expected_output = sample_pdfs['pdf1'].parent / 'merged.pdf'
         assert expected_output.exists()
+
+
+# Tests for the split command
+class TestSplitCommand:
+    """Tests for the pdf:split command."""
+
+    def test_split_into_individual_pages(self, runner, sample_pdfs):
+        """Test splitting a PDF into individual pages."""
+        output_dir = sample_pdfs['tmp_path'] / 'split_output'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(sample_pdfs['pdf1']),
+                '--output',
+                str(output_dir),
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output_dir.exists()
+
+        # Should create 3 files (pdf1 has 3 pages)
+        output_files = sorted(output_dir.glob('*.pdf'))
+        assert len(output_files) == 3
+
+        # Check filenames
+        assert output_files[0].name == 'doc1_page_1.pdf'
+        assert output_files[1].name == 'doc1_page_2.pdf'
+        assert output_files[2].name == 'doc1_page_3.pdf'
+
+        # Verify each file has exactly 1 page
+        for output_file in output_files:
+            pdf = pymupdf.open(str(output_file))
+            assert len(pdf) == 1
+            pdf.close()
+
+    def test_split_with_custom_prefix(self, runner, sample_pdfs):
+        """Test splitting with a custom filename prefix."""
+        output_dir = sample_pdfs['tmp_path'] / 'split_prefix'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(sample_pdfs['pdf2']),  # 2 pages
+                '--output',
+                str(output_dir),
+                '--prefix',
+                'chapter',
+            ],
+        )
+
+        assert result.exit_code == 0
+        assert output_dir.exists()
+
+        output_files = sorted(output_dir.glob('*.pdf'))
+        assert len(output_files) == 2
+        assert output_files[0].name == 'chapter_page_1.pdf'
+        assert output_files[1].name == 'chapter_page_2.pdf'
+
+    def test_split_default_output_directory(self, runner, sample_pdfs):
+        """Test that default output directory is created next to input file."""
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(sample_pdfs['pdf2']),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        # Default output should be {filename}_split next to the input
+        expected_output_dir = sample_pdfs['pdf2'].parent / 'doc2_split'
+        assert expected_output_dir.exists()
+
+        output_files = list(expected_output_dir.glob('*.pdf'))
+        assert len(output_files) == 2
+
+    def test_split_nonexistent_file(self, runner, tmp_path):
+        """Test splitting a nonexistent file."""
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(tmp_path / 'nonexistent.pdf'),
+            ],
+        )
+
+        assert result.exit_code == 1
+        assert 'not found' in result.stdout.lower()
+
+    def test_split_non_pdf_file(self, runner, tmp_path):
+        """Test splitting a non-PDF file."""
+        txt_file = tmp_path / 'file.txt'
+        txt_file.write_text('not a pdf')
+
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(txt_file),
+            ],
+        )
+
+        assert result.exit_code == 1
+        assert 'must be a pdf' in result.stdout.lower()
+
+    def test_split_single_page_pdf(self, runner, tmp_path):
+        """Test splitting a single-page PDF."""
+        # Create a single-page PDF
+        pdf_path = tmp_path / 'single.pdf'
+        pdf = pymupdf.open()
+        page = pdf.new_page(width=612, height=792)
+        page.insert_text((100, 100), 'Single page')
+        pdf.save(str(pdf_path))
+        pdf.close()
+
+        output_dir = tmp_path / 'split_single'
+        result = runner.invoke(
+            app,
+            [
+                'pdf:split',
+                str(pdf_path),
+                '--output',
+                str(output_dir),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        output_files = list(output_dir.glob('*.pdf'))
+        assert len(output_files) == 1
+        assert output_files[0].name == 'single_page_1.pdf'

From 5d30f2dc8852cab0f34dfa86d3ca204195a2b504 Mon Sep 17 00:00:00 2001
From: Alessio <alessio.vertemati@gmail.com>
Date: Tue, 9 Dec 2025 20:55:33 +0100
Subject: [PATCH 3/3] Document pdf:split and pdf:merge commands

---
 README.md                      |   8 ++
 docs/howto/pdf_manipulation.md | 255 +++++++++++++++++++++++++++++++++
 docs/tutorials/using_cli.md    |  59 ++++++++
 3 files changed, 322 insertions(+)
 create mode 100644 docs/howto/pdf_manipulation.md

diff --git a/README.md b/README.md
index 11bb3bb..46b0909 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,8 @@ Once installed, you can use the `parxy` command to:
 - `parxy parse`: Extract text content from documents with customizable granularity levels and output formats. Process individual files or entire folders, use multiple drivers, and control output with progress bars.
 - `parxy preview`: Interactive document viewer showing metadata, table of contents, and content preview in a scrollable interface
 - `parxy markdown`: Convert documents into Markdown format, with optional combining of multiple documents
+- `parxy pdf:merge`: Merge multiple PDF files into one, with support for selecting specific page ranges
+- `parxy pdf:split`: Split a PDF file into individual pages
 - `parxy drivers`: List available document processing drivers
 - `parxy env`: Create a configuration file with default settings
 - `parxy docker`: Generate a Docker Compose configuration for self-hosted services
@@ -89,6 +91,12 @@ parxy preview document.pdf
 # Convert multiple PDFs to markdown and combine them
 parxy markdown --combine -o output/ doc1.pdf doc2.pdf
 
+# Merge multiple PDFs with page ranges
+parxy pdf:merge cover.pdf doc1.pdf[1:10] doc2.pdf -o merged.pdf
+
+# Split a PDF into individual pages
+parxy pdf:split document.pdf -o ./pages
+
 # List available drivers
 parxy drivers
 ```
diff --git a/docs/howto/pdf_manipulation.md b/docs/howto/pdf_manipulation.md
new file mode 100644
index 0000000..6f7c2f7
--- /dev/null
+++ b/docs/howto/pdf_manipulation.md
@@ -0,0 +1,255 @@
+# How to Manipulate PDFs with Parxy
+
+Parxy provides powerful **PDF manipulation commands** that allow you to merge multiple PDF files into one or split a single PDF into multiple files — all from the command line.
+
+These commands are useful for:
+- Combining multiple PDF documents into a single file
+- Extracting specific page ranges from PDFs
+- Splitting large PDFs into smaller, manageable files
+- Reorganizing PDF pages
+
+## Merging PDFs
+
+The `pdf:merge` command combines multiple PDF files into a single output file, with support for selecting specific page ranges.
+
+### Basic Merging
+
+Merge two or more PDF files:
+
+```bash
+parxy pdf:merge file1.pdf file2.pdf -o merged.pdf
+```
+
+If you don't specify an output file, you'll be prompted to enter one:
+
+```bash
+parxy pdf:merge file1.pdf file2.pdf
+# Prompts: Enter output filename or path: merged.pdf
+```
+
+### Merging Entire Folders
+
+You can merge all PDFs in a folder (non-recursively):
+
+```bash
+parxy pdf:merge /path/to/folder -o combined.pdf
+```
+
+Files from folders are included in alphabetical order.
+
+### Combining Files and Folders
+
+Mix individual files and folders:
+
+```bash
+parxy pdf:merge cover.pdf /path/to/chapters appendix.pdf -o book.pdf
+```
+
+### Selecting Specific Pages
+
+Use square brackets to specify page ranges (1-based indexing):
+
+**Single page:**
+```bash
+parxy pdf:merge document.pdf[1] -o first_page.pdf
+```
+
+**Page range:**
+```bash
+parxy pdf:merge document.pdf[1:3] -o first_three_pages.pdf
+```
+
+**From start to page N:**
+```bash
+parxy pdf:merge document.pdf[:5] -o first_five_pages.pdf
+```
+
+**From page N to end:**
+```bash
+parxy pdf:merge document.pdf[10:] -o from_page_10.pdf
+```
+
+### Advanced Merging Examples
+
+**Combine specific pages from multiple documents:**
+```bash
+parxy pdf:merge doc1.pdf[1] doc2.pdf[2:4] doc3.pdf[:2] -o selected_pages.pdf
+```
+
+**Mix full files with page ranges:**
+```bash
+parxy pdf:merge cover.pdf report.pdf[1:10] summary.pdf appendix.pdf[5:] -o final_report.pdf
+```
+
+**Merge chapter files:**
+```bash
+parxy pdf:merge intro.pdf chapter1.pdf chapter2.pdf chapter3.pdf conclusion.pdf -o complete_book.pdf
+```
+
+### Output Path Handling
+
+- If you provide a full path, the file is created there
+- If you provide just a filename, it's created in the same directory as the first input file
+- The `.pdf` extension is added automatically if not provided
+
+```bash
+# Creates merged.pdf in the same directory as file1.pdf
+parxy pdf:merge file1.pdf file2.pdf -o merged
+
+# Creates in specified directory
+parxy pdf:merge file1.pdf file2.pdf -o /output/dir/merged.pdf
+```
+
+## Splitting PDFs
+
+The `pdf:split` command divides a single PDF into individual pages, with each page becoming a separate PDF file.
+
+### Basic Splitting
+
+Split a PDF into individual pages:
+
+```bash
+parxy pdf:split document.pdf
+```
+
+This creates a folder named `document_split/` containing:
+- `document_page_1.pdf`
+- `document_page_2.pdf`
+- `document_page_3.pdf`
+- etc.
+
+### Custom Output Directory
+
+Specify where to save the split files:
+
+```bash
+parxy pdf:split document.pdf --output /path/to/output
+```
+
+### Custom Filename Prefix
+
+Change the prefix of output filenames:
+
+```bash
+parxy pdf:split book.pdf --prefix chapter
+```
+
+Creates files named:
+- `chapter_page_1.pdf`
+- `chapter_page_2.pdf`
+- etc.
+
+### Complete Examples
+
+**Split with custom output directory:**
+```bash
+parxy pdf:split annual_report.pdf -o ./pages
+```
+
+**Split with custom prefix:**
+```bash
+parxy pdf:split presentation.pdf --prefix slide
+```
+
+Creates:
+- `slide_page_1.pdf`
+- `slide_page_2.pdf`
+- etc.
+
+**Split with both custom output and prefix:**
+```bash
+parxy pdf:split document.pdf -o ./individual_pages -p page
+```
+
+## Combining Merge and Split
+
+You can chain operations together using the CLI:
+
+**Example: Extract specific pages and split them:**
+```bash
+# First, extract pages 10-20
+parxy pdf:merge document.pdf[10:20] -o extracted.pdf
+
+# Then split into individual pages
+parxy pdf:split extracted.pdf -o ./individual_pages
+```
+
+**Example: Merge and organize:**
+```bash
+# Merge selected pages from multiple documents
+parxy pdf:merge doc1.pdf[1:5] doc2.pdf[3:8] -o combined.pdf
+
+# Split the combined result into individual pages
+parxy pdf:split combined.pdf -o ./pages -p combined_page
+```
+
+## Tips and Best Practices
+
+### Page Numbering
+- All page ranges use **1-based indexing** (first page is page 1, not 0)
+- Ranges are **inclusive** (e.g., `[1:3]` includes pages 1, 2, and 3)
+
+### File Organization
+- Use folders to keep merged/split files organized
+- Use descriptive prefixes to make file purposes clear
+- Split creates a dedicated folder by default to avoid clutter
+
+### Performance
+- Both commands are optimized for speed
+- Large PDFs are processed efficiently
+- Progress information is displayed during processing
+
+### Error Handling
+- Invalid page ranges are reported with warnings
+- Missing files are detected before processing starts
+- The commands validate input before making changes
+
+## Command Reference
+
+### pdf:merge
+
+```bash
+parxy pdf:merge [FILES...] --output OUTPUT
+```
+
+**Arguments:**
+- `FILES`: One or more PDF files or folders. Supports page ranges: `file.pdf[1:3]`
+
+**Options:**
+- `--output, -o`: Output file path (prompted if not provided)
+
+**Examples:**
+```bash
+parxy pdf:merge file1.pdf file2.pdf -o merged.pdf
+parxy pdf:merge folder1/ file.pdf folder2/ -o combined.pdf
+parxy pdf:merge doc.pdf[1:10] doc.pdf[20:30] -o selections.pdf
+```
+
+### pdf:split
+
+```bash
+parxy pdf:split INPUT_FILE [OPTIONS]
+```
+
+**Arguments:**
+- `INPUT_FILE`: PDF file to split into individual pages
+
+**Options:**
+- `--output, -o`: Output directory (default: `{filename}_split/`)
+- `--prefix, -p`: Output filename prefix (default: input filename)
+
+**Examples:**
+```bash
+parxy pdf:split document.pdf
+parxy pdf:split document.pdf -o ./pages
+parxy pdf:split document.pdf -o ./pages -p page
+```
+
+## Getting Help
+
+For detailed command usage, use the `--help` flag:
+
+```bash
+parxy pdf:merge --help
+parxy pdf:split --help
+```
diff --git a/docs/tutorials/using_cli.md b/docs/tutorials/using_cli.md
index d5541f8..42adb33 100644
--- a/docs/tutorials/using_cli.md
+++ b/docs/tutorials/using_cli.md
@@ -13,6 +13,8 @@ The Parxy CLI lets you:
 | `parxy parse`    | Extract text content from documents with customizable detail levels and output formats. Process files or folders with multiple drivers. |
 | `parxy preview`  | Interactive document viewer with metadata, table of contents, and scrollable content preview                |
 | `parxy markdown` | Convert parsed documents into Markdown format (optionally combine multiple files)                           |
+| `parxy pdf:merge`| Merge multiple PDF files into one, with support for page ranges                                            |
+| `parxy pdf:split`| Split a PDF file into individual pages                                                                      |
 | `parxy drivers`  | List available document processing drivers                                                                  |
 | `parxy env`      | Generate a default `.env` configuration file                                                                |
 | `parxy docker`   | Create a Docker Compose configuration for running Parxy-related services                                    |
@@ -207,6 +209,61 @@ parxy markdown --combine -o output/ doc1.pdf doc2.pdf doc3.pdf
 This will generate a file named `combined_output.md` in the output directory.
 
 
+## Manipulating PDFs
+
+Parxy provides two powerful commands for PDF manipulation: merging multiple PDFs into one and splitting a single PDF into multiple files.
+
+### Merging PDFs
+
+The `pdf:merge` command combines multiple PDF files into a single output file. You can merge entire files, specific page ranges, or folders of PDFs.
+
+**Basic merge:**
+```bash
+parxy pdf:merge file1.pdf file2.pdf -o merged.pdf
+```
+
+**Merge with page ranges:**
+```bash
+parxy pdf:merge doc1.pdf[1:5] doc2.pdf[3:7] -o combined.pdf
+```
+
+Page range syntax (1-based indexing):
+- `file.pdf[1]` - Single page (page 1)
+- `file.pdf[1:5]` - Pages 1 through 5
+- `file.pdf[:3]` - First 3 pages
+- `file.pdf[5:]` - From page 5 to the end
+
+**Merge entire folders:**
+```bash
+parxy pdf:merge /path/to/pdfs -o combined.pdf
+```
+
+**Mix files, folders, and page ranges:**
+```bash
+parxy pdf:merge cover.pdf /chapters doc.pdf[10:20] appendix.pdf -o book.pdf
+```
+
+### Splitting PDFs
+
+The `pdf:split` command divides a PDF file into individual pages, with each page becoming a separate PDF file.
+
+**Split into individual pages:**
+```bash
+parxy pdf:split document.pdf
+```
+
+This creates a `document_split/` folder containing `document_page_1.pdf`, `document_page_2.pdf`, etc.
+
+**Specify output directory and prefix:**
+```bash
+parxy pdf:split report.pdf -o ./pages -p page
+```
+
+Creates `page_1.pdf`, `page_2.pdf`, etc. in the `./pages` directory.
+
+For more detailed examples and use cases, see the [PDF Manipulation How-to Guide](../howto/pdf_manipulation.md).
+
+
 ## Managing Drivers
 
 To view the list of supported document parsing drivers:
@@ -271,6 +328,8 @@ With the CLI, you can use Parxy as a **standalone document parsing tool** — id
 | `parxy parse`    | Extract text from documents with multiple formats & drivers  |
 | `parxy preview`  | Interactive document viewer with metadata and TOC            |
 | `parxy markdown` | Generate Markdown output                                     |
+| `parxy pdf:merge`| Merge multiple PDF files with page range support             |
+| `parxy pdf:split`| Split PDF files into individual pages                        |
 | `parxy drivers`  | List supported drivers                                       |
 | `parxy env`      | Create default configuration file                            |
 | `parxy docker`   | Generate Docker Compose setup                                |