Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Options:
--storage PATH Storage directory (default: claude_sync_data)
--headless Run browser in headless mode
--quiet Suppress progress output
--strict Stop on first error and produce detailed failure report
```

## Project Structure
Expand Down Expand Up @@ -124,14 +125,23 @@ claude_sync/
## How It Works

1. **Browser Automation**: Uses Playwright to control Chrome via DevTools Protocol
2. **Project Discovery**: Navigates to Claude.ai projects page and clicks "View All" to load all projects
3. **Content Extraction**: For each project:
2. **Cookie Handling**: Automatically detects and accepts GDPR/cookie consent notices
3. **Project Discovery**: Navigates to Claude.ai projects page and clicks "View All" to load all projects
4. **Content Extraction**: For each project:
- Navigates to the project page
- Extracts list of knowledge files
- Clicks on each file to open the modal
- Uses context manager to safely open/close file modals
- Extracts the file content from the modal
- Saves to local storage
4. **Progress Tracking**: Provides real-time updates on sync progress
5. **Modal Management**:
- Context manager ensures modals are always closed
- Force cleanup runs between projects
- Periodic cleanup every 5 files within a project
- Aggressive modal removal if stuck states detected
6. **Progress Tracking**: Provides real-time updates on sync progress
7. **Error Handling**:
- Normal mode: Continues on errors and reports them at the end
- Strict mode: Stops on first error and generates detailed failure report

## Architecture

Expand All @@ -151,6 +161,22 @@ Claude.ai → Browser Automation → HTML Extraction → Local Storage
└── Progress Updates ────┘
```

## Strict Mode

When running with `--strict`, the sync will stop immediately on the first error and generate a detailed failure report:

```bash
python sync_cli.py sync --strict
```

The failure report includes:
- Exact file and project that failed
- Error details and timestamp
- Progress at time of failure
- All previous errors encountered

Report is saved to: `claude_sync_data/.metadata/strict_mode_failure.json`

## Development

### Running Tests
Expand Down
4 changes: 4 additions & 0 deletions claude_sync/browser/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class BrowserConfig(BaseModel):
default=720,
description="Browser viewport height"
)
strict_mode: bool = Field(
default=False,
description="Stop on first error and produce detailed failure report"
)

def get_chrome_args(self) -> List[str]:
"""Get Chrome launch arguments for memory optimization and stability."""
Expand Down
529 changes: 394 additions & 135 deletions claude_sync/browser/connection.py

Large diffs are not rendered by default.

72 changes: 68 additions & 4 deletions claude_sync/sync/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import List, Optional, Dict, Any, Callable
from datetime import datetime
import json

from claude_sync.browser import BrowserConfig, ChromeManager, ChromeConnection
from claude_sync.models import Project, KnowledgeFile
Expand All @@ -12,6 +13,38 @@
logger = logging.getLogger(__name__)


class StrictModeError(Exception):
"""Raised when sync fails in strict mode."""

def __init__(self, message: str, error_detail: Dict[str, Any], progress: 'SyncProgress'):
super().__init__(message)
self.error_detail = error_detail
self.progress = progress
self.report = self._generate_report()

def _generate_report(self) -> Dict[str, Any]:
"""Generate detailed failure report."""
return {
"error": self.error_detail,
"progress_at_failure": {
"completed_projects": self.progress.completed_projects,
"total_projects": self.progress.total_projects,
"completed_files": self.progress.completed_files,
"total_files": self.progress.total_files,
"current_project": self.progress.current_project,
"current_file": self.progress.current_file
},
"all_errors": self.progress.errors,
"timestamp": datetime.now().isoformat()
}

def save_report(self, path: Path) -> None:
"""Save failure report to file."""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, 'w') as f:
json.dump(self.report, f, indent=2)


class SyncProgress:
"""Tracks sync progress."""

Expand Down Expand Up @@ -128,6 +161,19 @@ async def sync_all(self, filter_projects: Optional[List[str]] = None) -> Dict[st
logger.info(f"Sync completed in {duration:.1f}s")
return summary

except StrictModeError as e:
# Save failure report
report_path = self.storage.base_path / ".metadata" / "strict_mode_failure.json"
e.save_report(report_path)
logger.error(f"Strict mode failure. Report saved to: {report_path}")

return {
"success": False,
"error": str(e),
"strict_mode_report": e.report,
"report_path": str(report_path),
"progress": self.progress.to_dict()
}
except Exception as e:
logger.error(f"Sync failed: {e}")
return {
Expand Down Expand Up @@ -181,13 +227,21 @@ async def _sync_project(
self._update_progress()

# Download each file
for file in files:
for i, file in enumerate(files):
await self._sync_knowledge_file(connection, project, file)

# Every 5 files, do a force close to ensure clean state
if (i + 1) % 5 == 0 and i < len(files) - 1:
logger.debug(f"Periodic modal cleanup after {i + 1} files")
await connection.force_close_all_modals()

# Mark project complete
self.progress.completed_projects += 1
self._update_progress()

# Force close any lingering modals before moving to next project
await connection.force_close_all_modals()

except Exception as e:
logger.error(f"Failed to sync project {project.name}: {e}")
self.progress.errors.append({
Expand Down Expand Up @@ -242,12 +296,22 @@ async def _sync_knowledge_file(

except Exception as e:
logger.error(f"Failed to sync file {file.name}: {e}")
self.progress.errors.append({
error_detail = {
"type": "file_sync",
"project": project.name,
"file": file.name,
"error": str(e)
})
"error": str(e),
"timestamp": datetime.now().isoformat()
}
self.progress.errors.append(error_detail)

# In strict mode, stop immediately and generate report
if self.browser_config.strict_mode:
raise StrictModeError(
f"Strict mode: Failed to sync file {file.name} in project {project.name}",
error_detail,
self.progress
)

async def _alternative_download(
self,
Expand Down
48 changes: 38 additions & 10 deletions sync_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def sync_all(args):
storage_path = Path(args.storage or "claude_sync_data")
logger.info(f"Storage path: {storage_path}")

config = BrowserConfig(headless=args.headless)
config = BrowserConfig(headless=args.headless, strict_mode=args.strict)
orchestrator = SyncOrchestrator(
storage_path,
browser_config=config,
Expand All @@ -58,6 +58,17 @@ async def sync_all(args):
print(f" - {error['type']}: {error.get('file', error.get('project'))}")
else:
print(f"\n✗ Sync failed: {result.get('error', 'Unknown error')}")

# Show strict mode report if available
if "strict_mode_report" in result:
print(f"\n📋 Strict Mode Failure Report:")
report = result["strict_mode_report"]
print(f" Failed file: {report['error']['file']}")
print(f" Project: {report['error']['project']}")
print(f" Error: {report['error']['error']}")
print(f" Progress: {report['progress_at_failure']['completed_files']}/{report['progress_at_failure']['total_files']} files")
print(f" Report saved to: {result['report_path']}")

sys.exit(1)


Expand All @@ -66,7 +77,7 @@ async def sync_project(args):
storage_path = Path(args.storage or "claude_sync_data")
logger.info(f"Storage path: {storage_path}")

config = BrowserConfig(headless=args.headless)
config = BrowserConfig(headless=args.headless, strict_mode=args.strict)
orchestrator = SyncOrchestrator(
storage_path,
browser_config=config,
Expand Down Expand Up @@ -117,36 +128,53 @@ async def list_projects(args):
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(description="Sync Claude.ai data locally")
parser.add_argument(

subparsers = parser.add_subparsers(dest="command", help="Commands")

# Common arguments for all commands
common_parser = argparse.ArgumentParser(add_help=False)
common_parser.add_argument(
"--storage",
help="Storage directory (default: claude_sync_data)",
default="claude_sync_data"
)
parser.add_argument(
common_parser.add_argument(
"--headless",
action="store_true",
help="Run browser in headless mode"
)
parser.add_argument(
common_parser.add_argument(
"--quiet",
action="store_true",
help="Suppress progress output"
)

subparsers = parser.add_subparsers(dest="command", help="Commands")
common_parser.add_argument(
"--strict",
action="store_true",
help="Stop on first error and produce detailed failure report"
)

# Sync all command
sync_all_parser = subparsers.add_parser("sync", help="Sync all projects")
sync_all_parser = subparsers.add_parser(
"sync",
help="Sync all projects",
parents=[common_parser]
)

# Sync project command
sync_project_parser = subparsers.add_parser(
"sync-project",
help="Sync a specific project"
help="Sync a specific project",
parents=[common_parser]
)
sync_project_parser.add_argument("project", help="Project name to sync")

# List command
list_parser = subparsers.add_parser("list", help="List synced projects")
list_parser = subparsers.add_parser(
"list",
help="List synced projects",
parents=[common_parser]
)

args = parser.parse_args()

Expand Down
Loading