pivlab · d33bs · Nov 10, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -43,5 +43,5 @@ jobs:
         env:
           # use the same models here to keep ci run duration low
           # by avoiding extra downloads
-          MAI_GENERAL_MODEL_NAME: "openai/llama3.2:3b"
-          MAI_CODE_SPECIALIST_MODEL_NAME: "openai/llama3.2:3b"
+          MANUGENAI_MODEL_NAME: "openai/llama3.2:3b"
+          MANUGENAI_FIGURE_MODEL_NAME: "openai/llama3.2:3b"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     -   id: check-yaml
     -   id: detect-private-key
 -   repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "v2.6.0"
+    rev: "v2.11.1"
     hooks:
     -   id: pyproject-fmt
 -   repo: https://github.com/citation-file-format/cffconvert
@@ -39,12 +39,12 @@ repos:
     -   id: yamllint
         exclude: pre-commit-config.yaml
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.13.0"
+    rev: "v0.14.4"
     hooks:
     -   id: ruff-format
     -   id: ruff-check
 -   repo: https://github.com/rhysd/actionlint
-    rev: v1.7.7
+    rev: v1.7.8
     hooks:
     -   id: actionlint
 -   repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update

diff --git a/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py b/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py
@@ -1,6 +1,6 @@
 """
 An agent workflow to enhance content
-using openalex.
+using openalex, with evidence-card synthesis.
 """
 
 from __future__ import annotations
@@ -19,22 +19,38 @@
 parse_list_tool = FunctionTool(func=parse_list)
 oa_search_tool = FunctionTool(func=openalex_query)
 
-# Extract free-text topics
+# 1) Extract free-text topics (unchanged, but fixed wording)
 agent_extract_topics = Agent(
     model=LLM,
     name="extract_topics",
-    description="Extract 3–5 key research topics from the draft; output as bullet points, one per line.",
+    description="Extract 3-5 key research topics from the draft; output as a single comma-separated line.",
     instruction="""
-You get the user’s draft text in the user prompt.
-List the 3–5 most relevant research topics as a comman-separated bullet list like this:
+You get the user's draft text in the user prompt.
+List the 3-5 most relevant research topics as a comma-separated list:
 topic one, topic two, ...
 Return only the topics, no extra commentary or JSON.
 Do NOT comment on the topics or provide explanations.
 """,
     output_key="topics",
 )
 
-# Search OpenAlex
+# 1b) Normalize topics (keeps output a clean comma-separated line)
+agent_normalize_topics = Agent(
+    model=LLM,
+    name="normalize_topics",
+    description="Normalize the comma-separated topics into a unique, cleaned list.",
+    instruction="""
+You receive a comma-separated list of topics in the user prompt.
+- Lowercase
+- Trim whitespace
+- Deduplicate
+- Singularize obvious plurals only if unambiguous
+Return a single comma-separated line. No extra text.
+""",
+    output_key="topics",
+)
+
+# 2) Search OpenAlex (unchanged; resilient wrapper retained)
 agent_search_openalex = ResilientToolAgent(
     Agent(
         model=LLM,
@@ -43,36 +59,159 @@
         instruction="""
 Call `openalex_query` with `{topics}`.
 Return the mapping as `search_results` (topic → list of URLs).
-Do NOT provide code to perform this action - you must do it by invoking the tool calls.
+Do NOT provide code to perform this action - invoke the tool calls.
 """,
         tools=[oa_search_tool],
         output_key="search_results",
     ),
     max_retries=3,
 )
 
-# Improve draft
-agent_improve_draft = Agent(
+# 3) Build Evidence Cards (MAP)
+agent_build_evidence_cards = Agent(
+    model=LLM,
+    name="build_evidence_cards",
+    description="Create YAML 'evidence cards' from the OpenAlex search results.",
+    instruction="""
+You get `search_results` like: {topic -> [url, ...]}.
+For EACH url, create a YAML card with this exact schema:
+
+- id: "OA<number>"            # increment from 1 across all cards in this run
+  topic: "<topic>"
+  url: "<url>"
+  title: "<paper title or best available>"
+  authors: ["Last, First", ...]       # best-effort; omit if unknown
+  year: <yyyy>                         # best-effort; omit if unknown
+  venue: "<journal or venue>"          # best-effort; omit if unknown
+  key_findings:
+    - "<factual finding 1>"
+    - "<factual finding 2>"
+  relevance: "<1-2 sentences on why this supports/extends the draft>"
+  # include this only if a short verbatim excerpt is present:
+  # quote: "<short verbatim excerpt>"
+
+Rules:
+- Include only facts supported by available metadata/abstract/snippet.
+- If a field is unknown, omit it rather than guessing.
+- De-duplicate identical URLs across topics by making one card and using the most relevant topic.
+Return ONE YAML list only, with no extra commentary.
+""",
+    output_key="evidence_cards",
+)
+
+# 4) Topic Synthesis (REDUCE)
+agent_topic_synthesis = Agent(
+    model=LLM,
+    name="topic_synthesis",
+    description="Group evidence cards by topic and produce a concise synthesis per topic.",
+    instruction="""
+You receive:
+- evidence_cards: a YAML list as defined earlier.
+
+Task:
+- Group cards by `topic`.
+- For each topic, write a compact synthesis with:
+  - 1-3 bullet takeaways that reflect consensus across cards
+  - A “tensions” line if sources disagree
+  - Inline bracket citations using the cards' `id`s, e.g., [OA3, OA7].
+
+Return YAML with this shape:
+
+topics:
+  - topic: "<topic>"
+    synthesis:
+      takeaways:
+        - "<point> [OA#,...]"
+        - "<point> [#]"
+      tensions: "<optional sentence> [OA#,...]"
+      supporting_cards: ["OA#", "OA#", ...]
+
+Return ONLY this YAML (no extra commentary).
+""",
+    output_key="topic_briefs",
+)
+
+# 5) Compose revised draft using evidence (GUARDED) — replaces 'agent_improve_draft'
+agent_compose_with_evidence = Agent(
     model=LLM,
-    name="improve_draft",
-    description="Rewrite the original draft using insights from `papers`.",
+    name="compose_with_evidence",
+    description="Rewrite the original draft using only supported claims, with inline [#] markers.",
     instruction="""
-You get:
-- Original draft: provided from the user prompt.
-- Fetched papers: `{search_results}`
-Incorporate relevant findings, facts, or citations into the draft.
-Output only the revised draft text.
+Inputs:
+- Original draft (user prompt)
+- topic_briefs (structured YAML)
+- evidence_cards (YAML list with metadata)
+
+Rewrite the draft by:
+- Preserving the author's voice and structure where possible.
+- Adding or revising sentences ONLY if you can attach at least one [#] marker from evidence_cards.
+- Avoid generic claims that lack a marker.
+- Place [#] immediately after the clause containing the fact.
+
+Output only the revised draft text (no YAML).
+Do NOT include any extra commentary.
+Do NOT invent citations or facts.
+Do NOT provide context on what the other agents or inputs were.
+ONLY return the enhanced draft text.
 """,
     output_key="enhanced_draft",
 )
 
-# Full pipeline
+# 6) References from cards
+agent_format_references = Agent(
+    model=LLM,
+    name="format_references",
+    description="Generate a 'References' section from evidence_cards.",
+    instruction="""
+You receive the full `evidence_cards` YAML.
+Produce a "References" section in plain text:
+- One entry per unique card id, sorted by id.
+- Format: [#] Authors. Title. Venue, Year. URL
+- Omit unknown fields rather than invent them.
+Return only the formatted references section.
+""",
+    output_key="references",
+)
+
+# 7) Attribution Check (final pass)
+agent_attribution_check = Agent(
+    model=LLM,
+    name="attribution_check",
+    description="Ensure every added factual sentence includes a valid [#] citation present in evidence_cards.",
+    instruction="""
+Inputs:
+- enhanced_draft
+- evidence_cards
+
+Task:
+- Flag any sentence that introduces new factual content but lacks [#] or cites an OA id not present.
+- If issues exist, output a corrected version of the draft with proper markers and nothing else.
+- If no issues, return the input `enhanced_draft`.
+
+Return only the corrected draft (or the original if already valid).
+Do NOT include any extra commentary.
+Do NOT invent citations or facts.
+Do NOT provide context on what the other agents or inputs were.
+ONLY return the corrected draft text (or the original if already valid).
+""",
+    output_key="enhanced_draft",
+)
+
+# Full pipeline — updated
 root_agent = SequentialAgent(
     name="citation_agent",
-    description="Extract topics → search & fetch → improve draft",
+    description=(
+        "Extract topics → normalize → search → evidence cards (map) → "
+        "topic synthesis (reduce) → compose with citations → references → attribution check"
+    ),
     sub_agents=[
         agent_extract_topics,
+        agent_normalize_topics,
         agent_search_openalex,
-        agent_improve_draft,
+        agent_build_evidence_cards,
+        agent_topic_synthesis,
+        agent_compose_with_evidence,
+        agent_format_references,
+        agent_attribution_check,
     ],
 )
diff --git a/packages/manugen-ai/src/manugen_ai/utils.py b/packages/manugen-ai/src/manugen_ai/utils.py
@@ -78,6 +78,13 @@ def get_llm(model_name: str, **kwargs):
     """
     from google.adk.models.lite_llm import LiteLlm
 
+    # check that our model_name is not None
+    if model_name is None:
+        raise ValueError("model_name cannot be None")
+
+    # normalize model name to lowercase for consistent matching
+    model_name = str(model_name).lower()
+
     if model_name.startswith(("openai/", "anthropic/")):
         # kwargs are interpreted as additional arguments to LiteLlm, such as
         # "response_format=ManuscriptStructure"

diff --git a/packages/manugen-ai/tests/test_agents.py b/packages/manugen-ai/tests/test_agents.py
@@ -2,8 +2,13 @@
 Tests for various agents
 """
 
+import re
+
 import pytest
-from manugen_ai.agents.capitalizer.agent import root_agent
+from manugen_ai.agents.ai_science_writer.sub_agents.citations.agent import (
+    root_agent as citations_agent,
+)
+from manugen_ai.agents.capitalizer.agent import root_agent as capitalizer_agent
 from manugen_ai.utils import run_agent_workflow
 
 
@@ -17,7 +22,7 @@ async def test_agent_capitalizer():
     # retry 5 times
     for attempt in range(5):
         _, session_state, _ = await run_agent_workflow(
-            agent=root_agent,
+            agent=capitalizer_agent,
             prompt="""
             this is a sentence to correct
             """,
@@ -32,3 +37,32 @@ async def test_agent_capitalizer():
             # Final attempt failed, raise assertion
             assert "output" in session_state.keys()
             assert session_state["output"] == expected_output
+
+
+@pytest.mark.asyncio
+async def test_agent_citations():
+    # retry 5 times
+    for attempt in range(5):
+        _, session_state, _ = await run_agent_workflow(
+            agent=citations_agent,
+            prompt="""
+            CellProfiler is a free open-source software designed to
+            enable biologists without training in computer vision or
+            programming to quantitatively measure phenotypes from
+            thousands of images automatically. More information can
+            be found in the CellProfiler Wiki.
+            """,
+            app_name="app",
+            user_id="user",
+            session_id="001",
+            verbose=True,
+        )
+        # look for at least 3 citations like [1], [2], etc.
+        if (
+            "enhanced_draft" in session_state
+            and len(re.findall(r"\[\d+\]", session_state["enhanced_draft"])) >= 3
+        ):
+            break
+        if attempt == 4:
+            assert "enhanced_draft" in session_state.keys()
+            assert len(re.findall(r"\[\d+\]", session_state["enhanced_draft"])) >= 3