seanbrar · seanbrar · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.1] - 2026-02-16
+
+### Changed
+- arXiv categories are now batched into a single OR query (`cat:cs.AI OR cat:cs.CL`), reducing N parallel API calls to 1
+- `page_size` now matches `min(max_results, 100)` instead of always requesting 100 results
+- `ThreadPoolExecutor` removed from `fetch_recent_papers()` since only one API call is made
+- `--force-refresh` now fetches today's papers (1-day window) instead of a full 7-day backfill; the 7-day bootstrap is reserved for first runs only
+- Quick Start no longer recommends `--force-refresh` for the initial run since `paperweight run` already backfills automatically
+
+### Added
+- RSS feed fetcher (`fetch_rss_papers`) for daily lookups — no rate limits, sub-second metadata fetch
+- RSS-first routing in `fetch_recent_papers`: daily runs try RSS before falling back to the arXiv API
+- Exponential backoff (via `tenacity`) on `arxiv.HTTPError` with waits of 5 → 15 → 45 → 90 s
+- `ArxivRateLimitError` exception with user-friendly message for HTTP 429 responses
+
 ## [0.3.0] - 2026-02-15
 
 ### Added
@@ -87,7 +102,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Email notification system
 - YAML-based configuration
 
-[Unreleased]: https://github.com/seanbrar/paperweight/compare/v0.3.0...HEAD
+[Unreleased]: https://github.com/seanbrar/paperweight/compare/v0.3.1...HEAD
+[0.3.1]: https://github.com/seanbrar/paperweight/compare/v0.3.0...v0.3.1
 [0.3.0]: https://github.com/seanbrar/paperweight/compare/v0.2.0...v0.3.0
 [0.2.0]: https://github.com/seanbrar/paperweight/compare/v0.1.2...v0.2.0
 [0.1.2]: https://github.com/seanbrar/paperweight/compare/v0.1.1...v0.1.2

diff --git a/README.md b/README.md
@@ -47,11 +47,15 @@ source .venv/bin/activate
 ## Quick start (works without API keys)
 
 ```bash
-paperweight init           # create config.yaml with safe defaults
-paperweight doctor         # check your setup for issues
-paperweight run --force-refresh  # fetch papers and produce a digest
+paperweight init    # create config.yaml with safe defaults
+paperweight doctor  # check your setup for issues
+paperweight run     # fetch papers and produce a digest
 ```
 
+The first run automatically backfills a week of papers. After that, the same
+`paperweight run` fetches only what's new. Use `--force-refresh` to re-fetch
+if you've already run today.
+
 Notes:
 
 - Default analyzer mode is `abstract` (no API key required).

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "academic-paperweight"
-version = "0.3.0"
+version = "0.3.1"
 description = "Automated retrieval, filtering, and LLM-powered summarization of arXiv papers based on your research interests."
 readme = "README.md"
 requires-python = ">=3.11, <3.14"

diff --git a/src/mocks/local_client.py b/src/mocks/local_client.py
@@ -20,8 +20,7 @@
 
 
 def mock_fetch_paper_content(
-    paper_id: str,
-    files_dir: Path = DEFAULT_FILES_DIR
+    paper_id: str, files_dir: Path = DEFAULT_FILES_DIR
 ) -> Tuple[Optional[bytes], Optional[str]]:
     """Mock replacement for paperweight.scraper.fetch_paper_content.
 
@@ -37,7 +36,7 @@ def mock_fetch_paper_content(
         or (None, None) if no file found.
     """
     # Normalize paper_id - strip version if present for base lookup
-    base_id = paper_id.split('v')[0] if 'v' in paper_id else paper_id
+    base_id = paper_id.split("v")[0] if "v" in paper_id else paper_id
 
     # Try different ID patterns (with/without version)
     id_patterns = [paper_id]
@@ -48,7 +47,7 @@ def mock_fetch_paper_content(
     if paper_id == base_id:
         # Look for any versioned file
         for f in files_dir.glob(f"{base_id}v*.tar.gz"):
-            id_patterns.insert(0, f.stem.replace('.tar', ''))
+            id_patterns.insert(0, f.stem.replace(".tar", ""))
             break
         for f in files_dir.glob(f"{base_id}v*.pdf"):
             if f.stem not in id_patterns:
@@ -70,17 +69,17 @@ def mock_fetch_paper_content(
 
 
 def mock_fetch_arxiv_papers(
-    category: str,
+    categories: List[str],
     start_date: Any,
     max_results: Optional[int] = None,
-    db_path: Path = DEFAULT_DB_PATH
+    db_path: Path = DEFAULT_DB_PATH,
 ) -> List[Dict[str, Any]]:
     """Mock replacement for paperweight.scraper.fetch_arxiv_papers.
 
     Reads paper metadata from local SQLite database instead of arXiv API.
 
     Args:
-        category: The arXiv category to filter by (e.g., "cs.AI")
+        categories: arXiv categories to filter by (e.g., ``['cs.AI', 'cs.CL']``)
         start_date: Not used in mock (we return all matching papers)
         max_results: Maximum number of results to return
         db_path: Path to the SQLite database
@@ -95,8 +94,10 @@ def mock_fetch_arxiv_papers(
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
 
-    sql = "SELECT * FROM papers WHERE categories LIKE ?"
-    params: List[Any] = [f"%{category}%"]
+    # Build category filter with OR logic
+    cat_conditions = " OR ".join(["categories LIKE ?" for _ in categories])
+    sql = f"SELECT * FROM papers WHERE ({cat_conditions})"
+    params: List[Any] = [f"%{cat}%" for cat in categories]
 
     if max_results:
         sql += " LIMIT ?"
@@ -107,12 +108,14 @@ def mock_fetch_arxiv_papers(
 
     papers = []
     for row in rows:
-        papers.append({
-            "title": row["title"],
-            "link": f"http://arxiv.org/abs/{row['id']}",
-            "date": datetime.fromisoformat(row["published"]).date(),
-            "abstract": row["abstract"],
-        })
+        papers.append(
+            {
+                "title": row["title"],
+                "link": f"http://arxiv.org/abs/{row['id']}",
+                "date": datetime.fromisoformat(row["published"]).date(),
+                "abstract": row["abstract"],
+            }
+        )
 
     conn.close()
     return papers
@@ -132,18 +135,17 @@ def patch_scraper_for_local_mirror(monkeypatch, files_dir: Path = DEFAULT_FILES_
         def patched_scraper(monkeypatch):
             patch_scraper_for_local_mirror(monkeypatch)
     """
+
     def local_fetch_paper_content(paper_id):
         return mock_fetch_paper_content(paper_id, files_dir)
 
     monkeypatch.setattr(
-        "paperweight.scraper.fetch_paper_content",
-        local_fetch_paper_content
+        "paperweight.scraper.fetch_paper_content", local_fetch_paper_content
     )
 
     # Also patch the retry-decorated wrapper if needed
     monkeypatch.setattr(
-        "paperweight.scraper.fetch_arxiv_papers",
-        mock_fetch_arxiv_papers
+        "paperweight.scraper.fetch_arxiv_papers", mock_fetch_arxiv_papers
     )
 
 
@@ -159,7 +161,7 @@ def __init__(
         page_size: int = 100,
         delay_seconds: float = 3,
         num_retries: int = 3,
-        mirror_path: Path = DEFAULT_MIRROR_PATH
+        mirror_path: Path = DEFAULT_MIRROR_PATH,
     ):
         self.page_size = page_size
         self.delay_seconds = delay_seconds
@@ -174,17 +176,15 @@ def __init__(
             )
 
     def results(
-        self,
-        search: arxiv.Search,
-        offset: int = 0
+        self, search: arxiv.Search, offset: int = 0
     ) -> Generator[arxiv.Result, None, None]:
         """Execute search against local SQLite database."""
         conn = sqlite3.connect(self.mirror_db_path)
         conn.row_factory = sqlite3.Row
         cursor = conn.cursor()
 
-        query_str = getattr(search, 'query', '')
-        id_list = getattr(search, 'id_list', [])
+        query_str = getattr(search, "query", "")
+        id_list = getattr(search, "id_list", [])
 
         sql = "SELECT * FROM papers WHERE 1=1"
         params: List[Any] = []
@@ -211,7 +211,7 @@ def results(
                     params.append(f"%{term}%")
                     params.append(f"%{term}%")
 
-        max_results = getattr(search, 'max_results', None)
+        max_results = getattr(search, "max_results", None)
         if max_results:
             sql += " LIMIT ?"
             params.append(int(max_results))
@@ -231,29 +231,29 @@ class Author:
             def __init__(self, name: str):
                 self.name = name
 
-        authors = [Author(n.strip()) for n in row['authors'].split(',')]
-        paper_id = row['id']
+        authors = [Author(n.strip()) for n in row["authors"].split(",")]
+        paper_id = row["id"]
 
         res = arxiv.Result(
             entry_id=f"http://arxiv.org/abs/{paper_id}",
-            updated=datetime.fromisoformat(row['updated']),
-            published=datetime.fromisoformat(row['published']),
-            title=row['title'],
+            updated=datetime.fromisoformat(row["updated"]),
+            published=datetime.fromisoformat(row["published"]),
+            title=row["title"],
             authors=authors,
-            summary=row['abstract'],
+            summary=row["abstract"],
             comment=None,
             journal_ref=None,
-            doi=row['doi'],
-            primary_category=row['categories'].split(',')[0].strip(),
-            categories=[cat.strip() for cat in row['categories'].split(',')],
-            links=[]
+            doi=row["doi"],
+            primary_category=row["categories"].split(",")[0].strip(),
+            categories=[cat.strip() for cat in row["categories"].split(",")],
+            links=[],
         )
 
         # Monkey-patch download methods to use local files
-        local_pdf_path = row['local_file_path']
-        local_source_path = row['local_source_path']
+        local_pdf_path = row["local_file_path"]
+        local_source_path = row["local_source_path"]
 
-        def mock_download_pdf(dirpath: str = './', filename: str = '') -> str:
+        def mock_download_pdf(dirpath: str = "./", filename: str = "") -> str:
             if not filename:
                 filename = f"{paper_id}.pdf"
             target_path = Path(dirpath) / filename
@@ -263,7 +263,7 @@ def mock_download_pdf(dirpath: str = './', filename: str = '') -> str:
                 return str(target_path)
             raise FileNotFoundError(f"Mock PDF file missing for {paper_id}")
 
-        def mock_download_source(dirpath: str = './', filename: str = '') -> str:
+        def mock_download_source(dirpath: str = "./", filename: str = "") -> str:
             if not filename:
                 filename = f"{paper_id}.tar.gz"
             target_path = Path(dirpath) / filename
@@ -275,6 +275,6 @@ def mock_download_source(dirpath: str = './', filename: str = '') -> str:
 
         res.download_pdf = mock_download_pdf  # type: ignore
         res.download_source = mock_download_source  # type: ignore
-        res.pdf_url = row['pdf_url']
+        res.pdf_url = row["pdf_url"]
 
         return res
diff --git a/src/paperweight/__init__.py b/src/paperweight/__init__.py
@@ -15,11 +15,12 @@
     setup_and_get_papers,
     summarize_scored_papers,
 )
-from paperweight.scraper import get_recent_papers  # noqa: E402
+from paperweight.scraper import ArxivRateLimitError, get_recent_papers  # noqa: E402
 from paperweight.utils import load_config  # noqa: E402
 
 __all__ = [
     "__version__",
+    "ArxivRateLimitError",
     "get_recent_papers",
     "load_config",
     "process_and_summarize_papers",

diff --git a/src/paperweight/analyzer.py b/src/paperweight/analyzer.py
@@ -66,7 +66,9 @@ def get_abstracts(processed_papers, config, *, summary_concurrency=None):
     if analysis_type == "abstract":
         return [paper["abstract"] for paper in processed_papers]
     if analysis_type == "summary":
-        return summarize_papers(processed_papers, config, summary_concurrency=summary_concurrency)
+        return summarize_papers(
+            processed_papers, config, summary_concurrency=summary_concurrency
+        )
     raise ValueError(f"Unknown analysis type: {analysis_type}")
 
 
@@ -115,9 +117,7 @@ def _resolve_triage_model_config(
     analyzer_cfg = full_config.get("analyzer", {})
 
     provider = (
-        triage_cfg.get("llm_provider")
-        or analyzer_cfg.get("llm_provider")
-        or "openai"
+        triage_cfg.get("llm_provider") or analyzer_cfg.get("llm_provider") or "openai"
     ).lower()
     model = triage_cfg.get("model") or _default_model_for_provider(provider)
     api_key = (
@@ -206,7 +206,9 @@ async def _triage_one_paper_async(prompt, pollux_config, *, min_score):
     return _parse_triage_decision(answer, min_score=min_score)
 
 
-async def _run_triage_async(prompts, pollux_config, *, min_score, concurrency=TRIAGE_CONCURRENCY):
+async def _run_triage_async(
+    prompts, pollux_config, *, min_score, concurrency=TRIAGE_CONCURRENCY
+):
     """Run triage prompts concurrently with a semaphore, returning decisions in order."""
     semaphore = asyncio.Semaphore(concurrency)
     total = len(prompts)
@@ -277,11 +279,18 @@ def triage_papers(
 
     prompts = [_build_triage_prompt(paper, profile_text) for paper in papers]
 
-    triage_concurrency = full_config.get("concurrency", {}).get("triage", TRIAGE_CONCURRENCY)
+    triage_concurrency = full_config.get("concurrency", {}).get(
+        "triage", TRIAGE_CONCURRENCY
+    )
 
     try:
         decisions = asyncio.run(
-            _run_triage_async(prompts, pollux_config, min_score=min_score, concurrency=triage_concurrency)
+            _run_triage_async(
+                prompts,
+                pollux_config,
+                min_score=min_score,
+                concurrency=triage_concurrency,
+            )
         )
     except Exception as exc:
         logger.warning(
@@ -357,9 +366,13 @@ async def _summarize_one_paper_async(
     return str(response)
 
 
-def _resolve_summary_model_config(config: Dict[str, Any]) -> tuple[ProviderName, str, str]:
+def _resolve_summary_model_config(
+    config: Dict[str, Any],
+) -> tuple[ProviderName, str, str]:
     llm_provider = (config.get("llm_provider") or "openai").lower().strip()
-    api_key = config.get("api_key") or os.getenv(f"{llm_provider.upper()}_API_KEY") or ""
+    api_key = (
+        config.get("api_key") or os.getenv(f"{llm_provider.upper()}_API_KEY") or ""
+    )
     if llm_provider not in ("openai", "gemini") or not api_key:
         raise ValueError(
             "Summary analyzer requires a valid llm_provider (openai|gemini) and api_key."
@@ -385,7 +398,9 @@ def summarize_papers(  # noqa: C901
     provider, model_name, api_key = _resolve_summary_model_config(config)
     max_input_tokens = _int_setting(config.get("max_input_tokens"), 7000, minimum=500)
     max_input_chars = _int_setting(config.get("max_input_chars"), 20_000, minimum=1000)
-    effective_concurrency = summary_concurrency if summary_concurrency is not None else SUMMARY_CONCURRENCY
+    effective_concurrency = (
+        summary_concurrency if summary_concurrency is not None else SUMMARY_CONCURRENCY
+    )
 
     pollux_config = Config(
         provider=provider,
@@ -399,7 +414,9 @@ def summarize_papers(  # noqa: C901
         ),
     )
 
-    async def _run_summary_batch() -> tuple[List[str | None], List[tuple[int, BaseException]]]:
+    async def _run_summary_batch() -> (
+        tuple[List[str | None], List[tuple[int, BaseException]]]
+    ):
         semaphore = asyncio.Semaphore(effective_concurrency)
         results: List[str | None] = [None] * len(papers)
         failures: List[tuple[int, BaseException]] = []

diff --git a/src/paperweight/db.py b/src/paperweight/db.py
@@ -14,9 +14,7 @@ def is_db_enabled(config: Dict[str, Any]) -> bool:
 
 
 @contextmanager
-def connect_db(
-    db_config: Dict[str, Any], autocommit: bool = False
-) -> Generator:
+def connect_db(db_config: Dict[str, Any], autocommit: bool = False) -> Generator:
     """Create a database connection.
 
     Args: