diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 70ca3dd..9d5fc9f 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -43,5 +43,5 @@ jobs: env: # use the same models here to keep ci run duration low # by avoiding extra downloads - MAI_GENERAL_MODEL_NAME: "openai/llama3.2:3b" - MAI_CODE_SPECIALIST_MODEL_NAME: "openai/llama3.2:3b" + MANUGENAI_MODEL_NAME: "openai/llama3.2:3b" + MANUGENAI_FIGURE_MODEL_NAME: "openai/llama3.2:3b" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b24fcf..b7dd70b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: - id: check-yaml - id: detect-private-key - repo: https://github.com/tox-dev/pyproject-fmt - rev: "v2.6.0" + rev: "v2.11.1" hooks: - id: pyproject-fmt - repo: https://github.com/citation-file-format/cffconvert @@ -39,12 +39,12 @@ repos: - id: yamllint exclude: pre-commit-config.yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.13.0" + rev: "v0.14.4" hooks: - id: ruff-format - id: ruff-check - repo: https://github.com/rhysd/actionlint - rev: v1.7.7 + rev: v1.7.8 hooks: - id: actionlint - repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update diff --git a/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py b/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py index 4f509aa..da368b7 100644 --- a/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py +++ b/packages/manugen-ai/src/manugen_ai/agents/ai_science_writer/sub_agents/citations/agent.py @@ -1,6 +1,6 @@ """ An agent workflow to enhance content -using openalex. +using openalex, with evidence-card synthesis. """ from __future__ import annotations @@ -19,14 +19,14 @@ parse_list_tool = FunctionTool(func=parse_list) oa_search_tool = FunctionTool(func=openalex_query) -# Extract free-text topics +# 1) Extract free-text topics (unchanged, but fixed wording) agent_extract_topics = Agent( model=LLM, name="extract_topics", - description="Extract 3–5 key research topics from the draft; output as bullet points, one per line.", + description="Extract 3-5 key research topics from the draft; output as a single comma-separated line.", instruction=""" -You get the user’s draft text in the user prompt. -List the 3–5 most relevant research topics as a comman-separated bullet list like this: +You get the user's draft text in the user prompt. +List the 3-5 most relevant research topics as a comma-separated list: topic one, topic two, ... Return only the topics, no extra commentary or JSON. Do NOT comment on the topics or provide explanations. @@ -34,7 +34,23 @@ output_key="topics", ) -# Search OpenAlex +# 1b) Normalize topics (keeps output a clean comma-separated line) +agent_normalize_topics = Agent( + model=LLM, + name="normalize_topics", + description="Normalize the comma-separated topics into a unique, cleaned list.", + instruction=""" +You receive a comma-separated list of topics in the user prompt. +- Lowercase +- Trim whitespace +- Deduplicate +- Singularize obvious plurals only if unambiguous +Return a single comma-separated line. No extra text. +""", + output_key="topics", +) + +# 2) Search OpenAlex (unchanged; resilient wrapper retained) agent_search_openalex = ResilientToolAgent( Agent( model=LLM, @@ -43,7 +59,7 @@ instruction=""" Call `openalex_query` with `{topics}`. Return the mapping as `search_results` (topic → list of URLs). -Do NOT provide code to perform this action - you must do it by invoking the tool calls. +Do NOT provide code to perform this action - invoke the tool calls. """, tools=[oa_search_tool], output_key="search_results", @@ -51,28 +67,151 @@ max_retries=3, ) -# Improve draft -agent_improve_draft = Agent( +# 3) Build Evidence Cards (MAP) +agent_build_evidence_cards = Agent( + model=LLM, + name="build_evidence_cards", + description="Create YAML 'evidence cards' from the OpenAlex search results.", + instruction=""" +You get `search_results` like: {topic -> [url, ...]}. +For EACH url, create a YAML card with this exact schema: + +- id: "OA" # increment from 1 across all cards in this run + topic: "" + url: "" + title: "" + authors: ["Last, First", ...] # best-effort; omit if unknown + year: # best-effort; omit if unknown + venue: "" # best-effort; omit if unknown + key_findings: + - "" + - "" + relevance: "<1-2 sentences on why this supports/extends the draft>" + # include this only if a short verbatim excerpt is present: + # quote: "" + +Rules: +- Include only facts supported by available metadata/abstract/snippet. +- If a field is unknown, omit it rather than guessing. +- De-duplicate identical URLs across topics by making one card and using the most relevant topic. +Return ONE YAML list only, with no extra commentary. +""", + output_key="evidence_cards", +) + +# 4) Topic Synthesis (REDUCE) +agent_topic_synthesis = Agent( + model=LLM, + name="topic_synthesis", + description="Group evidence cards by topic and produce a concise synthesis per topic.", + instruction=""" +You receive: +- evidence_cards: a YAML list as defined earlier. + +Task: +- Group cards by `topic`. +- For each topic, write a compact synthesis with: + - 1-3 bullet takeaways that reflect consensus across cards + - A “tensions” line if sources disagree + - Inline bracket citations using the cards' `id`s, e.g., [OA3, OA7]. + +Return YAML with this shape: + +topics: + - topic: "" + synthesis: + takeaways: + - " [OA#,...]" + - " [#]" + tensions: " [OA#,...]" + supporting_cards: ["OA#", "OA#", ...] + +Return ONLY this YAML (no extra commentary). +""", + output_key="topic_briefs", +) + +# 5) Compose revised draft using evidence (GUARDED) — replaces 'agent_improve_draft' +agent_compose_with_evidence = Agent( model=LLM, - name="improve_draft", - description="Rewrite the original draft using insights from `papers`.", + name="compose_with_evidence", + description="Rewrite the original draft using only supported claims, with inline [#] markers.", instruction=""" -You get: -- Original draft: provided from the user prompt. -- Fetched papers: `{search_results}` -Incorporate relevant findings, facts, or citations into the draft. -Output only the revised draft text. +Inputs: +- Original draft (user prompt) +- topic_briefs (structured YAML) +- evidence_cards (YAML list with metadata) + +Rewrite the draft by: +- Preserving the author's voice and structure where possible. +- Adding or revising sentences ONLY if you can attach at least one [#] marker from evidence_cards. +- Avoid generic claims that lack a marker. +- Place [#] immediately after the clause containing the fact. + +Output only the revised draft text (no YAML). +Do NOT include any extra commentary. +Do NOT invent citations or facts. +Do NOT provide context on what the other agents or inputs were. +ONLY return the enhanced draft text. """, output_key="enhanced_draft", ) -# Full pipeline +# 6) References from cards +agent_format_references = Agent( + model=LLM, + name="format_references", + description="Generate a 'References' section from evidence_cards.", + instruction=""" +You receive the full `evidence_cards` YAML. +Produce a "References" section in plain text: +- One entry per unique card id, sorted by id. +- Format: [#] Authors. Title. Venue, Year. URL +- Omit unknown fields rather than invent them. +Return only the formatted references section. +""", + output_key="references", +) + +# 7) Attribution Check (final pass) +agent_attribution_check = Agent( + model=LLM, + name="attribution_check", + description="Ensure every added factual sentence includes a valid [#] citation present in evidence_cards.", + instruction=""" +Inputs: +- enhanced_draft +- evidence_cards + +Task: +- Flag any sentence that introduces new factual content but lacks [#] or cites an OA id not present. +- If issues exist, output a corrected version of the draft with proper markers and nothing else. +- If no issues, return the input `enhanced_draft`. + +Return only the corrected draft (or the original if already valid). +Do NOT include any extra commentary. +Do NOT invent citations or facts. +Do NOT provide context on what the other agents or inputs were. +ONLY return the corrected draft text (or the original if already valid). +""", + output_key="enhanced_draft", +) + +# Full pipeline — updated root_agent = SequentialAgent( name="citation_agent", - description="Extract topics → search & fetch → improve draft", + description=( + "Extract topics → normalize → search → evidence cards (map) → " + "topic synthesis (reduce) → compose with citations → references → attribution check" + ), sub_agents=[ agent_extract_topics, + agent_normalize_topics, agent_search_openalex, - agent_improve_draft, + agent_build_evidence_cards, + agent_topic_synthesis, + agent_compose_with_evidence, + agent_format_references, + agent_attribution_check, ], ) diff --git a/packages/manugen-ai/src/manugen_ai/utils.py b/packages/manugen-ai/src/manugen_ai/utils.py index 2fc1384..2429e67 100644 --- a/packages/manugen-ai/src/manugen_ai/utils.py +++ b/packages/manugen-ai/src/manugen_ai/utils.py @@ -78,6 +78,13 @@ def get_llm(model_name: str, **kwargs): """ from google.adk.models.lite_llm import LiteLlm + # check that our model_name is not None + if model_name is None: + raise ValueError("model_name cannot be None") + + # normalize model name to lowercase for consistent matching + model_name = str(model_name).lower() + if model_name.startswith(("openai/", "anthropic/")): # kwargs are interpreted as additional arguments to LiteLlm, such as # "response_format=ManuscriptStructure" diff --git a/packages/manugen-ai/tests/test_agents.py b/packages/manugen-ai/tests/test_agents.py index cd5f473..17c0cd2 100644 --- a/packages/manugen-ai/tests/test_agents.py +++ b/packages/manugen-ai/tests/test_agents.py @@ -2,8 +2,13 @@ Tests for various agents """ +import re + import pytest -from manugen_ai.agents.capitalizer.agent import root_agent +from manugen_ai.agents.ai_science_writer.sub_agents.citations.agent import ( + root_agent as citations_agent, +) +from manugen_ai.agents.capitalizer.agent import root_agent as capitalizer_agent from manugen_ai.utils import run_agent_workflow @@ -17,7 +22,7 @@ async def test_agent_capitalizer(): # retry 5 times for attempt in range(5): _, session_state, _ = await run_agent_workflow( - agent=root_agent, + agent=capitalizer_agent, prompt=""" this is a sentence to correct """, @@ -32,3 +37,32 @@ async def test_agent_capitalizer(): # Final attempt failed, raise assertion assert "output" in session_state.keys() assert session_state["output"] == expected_output + + +@pytest.mark.asyncio +async def test_agent_citations(): + # retry 5 times + for attempt in range(5): + _, session_state, _ = await run_agent_workflow( + agent=citations_agent, + prompt=""" + CellProfiler is a free open-source software designed to + enable biologists without training in computer vision or + programming to quantitatively measure phenotypes from + thousands of images automatically. More information can + be found in the CellProfiler Wiki. + """, + app_name="app", + user_id="user", + session_id="001", + verbose=True, + ) + # look for at least 3 citations like [1], [2], etc. + if ( + "enhanced_draft" in session_state + and len(re.findall(r"\[\d+\]", session_state["enhanced_draft"])) >= 3 + ): + break + if attempt == 4: + assert "enhanced_draft" in session_state.keys() + assert len(re.findall(r"\[\d+\]", session_state["enhanced_draft"])) >= 3