diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..819f743d --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +OPENAI_API_KEY= +GOOGLE_API_KEY= +ANTHROPIC_API_KEY= +PERPLEXITY_API_KEY= +SEMANTIC_SCHOLAR_KEY= +FUTURE_HOUSE_API_KEY= +OPENROUTER_API_KEY= diff --git a/denario/denario.py b/denario/denario.py index aa1dd710..f83f6f4a 100644 --- a/denario/denario.py +++ b/denario/denario.py @@ -44,7 +44,7 @@ def __init__(self, if project_dir is None: project_dir = os.path.join( os.getcwd(), DEFAUL_PROJECT_NAME ) if not os.path.exists(project_dir): - os.mkdir(project_dir) + os.makedirs(project_dir, exist_ok=True) if research is None: research = Research() # Initialize with default values @@ -56,12 +56,12 @@ def __init__(self, os.makedirs(project_dir, exist_ok=True) self.project_dir = project_dir + self._setup_input_files() + self.plots_folder = os.path.join(self.project_dir, INPUT_FILES, PLOTS_FOLDER) # Ensure the folder exists os.makedirs(self.plots_folder, exist_ok=True) - self._setup_input_files() - # Get keys from environment if they exist self.keys = KeyManager() self.keys.get_keys_from_env() @@ -905,6 +905,59 @@ def referee(self, except FileNotFoundError as e: print('Denario failed to provide a review for the paper. Ensure that a paper in the `paper` folder ex') print(f'Error: {e}') + + def add_literature(self, sections: list[str] = ["idea", "methods", "results"], n_paragraphs: int = None) -> None: + """ + Add literature references to the specified sections. + + Args: + sections: list of sections to add citations to (choose from "idea", "method", "results"). + n_paragraphs: maximum number of paragraphs to process per section. + """ + from .paper_agents.literature import process_tex_file_with_references + + # Start message + print(f"--- Adding Literature References to {', '.join(sections)} ---") + + # Mapping between section names and Research attributes + attr_map = {"idea": "idea", "methods": "methodology", "results": "results"} + + all_bibs = "" + for section in sections: + # Check if section exists in research object + attr_name = attr_map.get(section, section) + content = getattr(self.research, attr_name, None) + + if not content: + print(f"Skipping {section} as it is empty or not found.") + continue + + print(f"Processing {section}...") + new_content, bib = process_tex_file_with_references(content, self.keys, nparagraphs=n_paragraphs) + + # Update internal state + setattr(self.research, attr_name, new_content) + + # Accumulate bibliography + if bib: + all_bibs += bib.strip() + "\n\n" + + # Save the updated section back to input_files + file_map = {"idea": IDEA_FILE, "methods": METHOD_FILE, "results": RESULTS_FILE} + if section in file_map: + save_path = os.path.join(self.project_dir, INPUT_FILES, file_map[section]) + with open(save_path, 'w', encoding='utf-8') as f: + f.write(new_content) + print(f" - Updated {file_map[section]}") + + # Save combined bibliography to input_files/bibliography.bib + if all_bibs: + bib_path = os.path.join(self.project_dir, INPUT_FILES, "bibliography.bib") + with open(bib_path, 'w', encoding='utf-8') as f: + f.write(all_bibs.strip()) + print(f" - Generated bibliography.bib") + + print("[SUCCESS] Literature references added.") def research_pilot(self, data_description: str | None = None) -> None: """Full run of Denario. It calls the following methods sequentially: diff --git a/denario/experiment.py b/denario/experiment.py index ac6c72dd..74ac49c5 100644 --- a/denario/experiment.py +++ b/denario/experiment.py @@ -5,6 +5,7 @@ from .key_manager import KeyManager from .prompts.experiment import experiment_planner_prompt, experiment_engineer_prompt, experiment_researcher_prompt from .utils import create_work_dir, get_task_result +from .openrouter_config import build_cmbagent_api_keys class Experiment: """ @@ -79,36 +80,92 @@ def run_experiment(self, data_description: str, **kwargs): print(f"Restart at step: {self.restart_at_step}") print(f"Hardware constraints: {self.hardware_constraints}") - results = cmbagent.planning_and_control_context_carryover(data_description, - n_plan_reviews = 1, - max_n_attempts = self.max_n_attempts, - max_plan_steps = self.max_n_steps, - max_rounds_control = 500, - engineer_model = self.engineer_model, - researcher_model = self.researcher_model, - planner_model = self.planner_model, - plan_reviewer_model = self.plan_reviewer_model, - plan_instructions=self.planner_append_instructions, - researcher_instructions=self.researcher_append_instructions, - engineer_instructions=self.engineer_append_instructions, - work_dir = self.experiment_dir, - api_keys = self.api_keys, - restart_at_step = self.restart_at_step, - hardware_constraints = self.hardware_constraints, - default_llm_model = self.orchestration_model, - default_formatter_model = self.formatter_model - ) + # Check if using OpenRouter (OPENROUTER_API_KEY is set) + import os + import cmbagent.utils as cmbagent_utils + + use_openrouter = bool(os.getenv("OPENROUTER_API_KEY")) + original_get_model_config = None + + if use_openrouter: + print("Using OpenRouter for all LLM calls...") + + # Monkey-patch cmbagent's get_model_config to use OpenRouter + original_get_model_config = cmbagent_utils.get_model_config + + def openrouter_get_model_config(model, api_keys): + """Patched get_model_config that routes through OpenRouter.""" + from denario.openrouter_config import build_openrouter_config + return build_openrouter_config(model) + + cmbagent_utils.get_model_config = openrouter_get_model_config + + # Also patch the module-level reference in cmbagent.cmbagent + import cmbagent.cmbagent as cmbagent_mod + cmbagent_mod.get_model_config = openrouter_get_model_config + + api_keys = build_cmbagent_api_keys() + else: + api_keys = self.api_keys + + try: + results = cmbagent.planning_and_control_context_carryover(data_description, + n_plan_reviews = 1, + max_n_attempts = self.max_n_attempts, + max_plan_steps = self.max_n_steps, + max_rounds_control = 500, + engineer_model = self.engineer_model, + researcher_model = self.researcher_model, + planner_model = self.planner_model, + plan_reviewer_model = self.plan_reviewer_model, + plan_instructions=self.planner_append_instructions, + researcher_instructions=self.researcher_append_instructions, + engineer_instructions=self.engineer_append_instructions, + work_dir = self.experiment_dir, + api_keys = api_keys, + restart_at_step = self.restart_at_step, + hardware_constraints = self.hardware_constraints, + default_llm_model = self.orchestration_model, + default_formatter_model = self.formatter_model, + ) + finally: + # Restore original function after call + if original_get_model_config is not None: + cmbagent_utils.get_model_config = original_get_model_config + import cmbagent.cmbagent as cmbagent_mod + cmbagent_mod.get_model_config = original_get_model_config chat_history = results['chat_history'] final_context = results['final_context'] try: - task_result = get_task_result(chat_history,'researcher_response_formatter') + # Try to get the result from the researcher formatter first + task_result = get_task_result(chat_history, 'researcher_response_formatter') + + # Fallback to executor formatter if researcher result is missing + if task_result is None: + print("Researcher result not found, falling back to executor_response_formatter...") + task_result = get_task_result(chat_history, 'executor_response_formatter') + + if task_result is None: + raise ValueError("Could not find result in chat history from any formatter (researcher or executor).") + except Exception as e: + # If we reached this point, we really don't have the task result raise e MD_CODE_BLOCK_PATTERN = r"```[ \t]*(?:markdown)[ \t]*\r?\n(.*)\r?\n[ \t]*```" - extracted_results = re.findall(MD_CODE_BLOCK_PATTERN, task_result, flags=re.DOTALL)[0] - clean_results = re.sub(r'^\s*\n', '', extracted_results) + matches = re.findall(MD_CODE_BLOCK_PATTERN, task_result, flags=re.DOTALL) + if not matches: + # If no markdown block found in task_result, maybe the whole string is the result or it's formatted differently + # Try a simpler extraction or use the whole string + if "##" in task_result: + clean_results = re.sub(r'^\s*\n', '', task_result) + else: + raise ValueError("Could not find markdown content in the task result.") + else: + extracted_results = matches[0] + clean_results = re.sub(r'^\s*\n', '', extracted_results) + self.results = clean_results self.plot_paths = final_context['displayed_images'] diff --git a/denario/key_manager.py b/denario/key_manager.py index 3d9acb26..c07391e5 100644 --- a/denario/key_manager.py +++ b/denario/key_manager.py @@ -8,6 +8,7 @@ class KeyManager(BaseModel): OPENAI: str | None = "" PERPLEXITY: str | None = "" SEMANTIC_SCHOLAR: str | None = "" + OPENROUTER: str | None = "" def get_keys_from_env(self) -> None: @@ -18,6 +19,7 @@ def get_keys_from_env(self) -> None: self.ANTHROPIC = os.getenv("ANTHROPIC_API_KEY") #not strictly needed self.PERPLEXITY = os.getenv("PERPLEXITY_API_KEY") #only for citations self.SEMANTIC_SCHOLAR = os.getenv("SEMANTIC_SCHOLAR_KEY") #only for fast semantic scholar + self.OPENROUTER = os.getenv("OPENROUTER_API_KEY") def __getitem__(self, key: str) -> str: return getattr(self, key) diff --git a/denario/langgraph_agents/reader.py b/denario/langgraph_agents/reader.py index ccee2567..49c5b69f 100644 --- a/denario/langgraph_agents/reader.py +++ b/denario/langgraph_agents/reader.py @@ -6,6 +6,7 @@ from .parameters import GraphState from ..config import INPUT_FILES, IDEA_FILE, METHOD_FILE, LITERATURE_FILE, REFEREE_FILE, PAPER_FOLDER +from ..llm import models def preprocess_node(state: GraphState, config: RunnableConfig): """ @@ -17,7 +18,19 @@ def preprocess_node(state: GraphState, config: RunnableConfig): ######################################### # set the LLM - if 'gemini' in state['llm']['model']: + if state["keys"].OPENROUTER: + model_name = state['llm']['model'] + openrouter_model = model_name + if model_name in models: + if models[model_name].openrouter_name: + openrouter_model = models[model_name].openrouter_name + + state['llm']['llm'] = ChatOpenAI(model=openrouter_model, + temperature=state['llm']['temperature'], + openai_api_key=state["keys"].OPENROUTER, + base_url="https://openrouter.ai/api/v1") + + elif 'gemini' in state['llm']['model']: state['llm']['llm'] = ChatGoogleGenerativeAI(model=state['llm']['model'], temperature=state['llm']['temperature'], google_api_key=state["keys"].GEMINI) diff --git a/denario/llm.py b/denario/llm.py index 012f2553..ccedb2c3 100644 --- a/denario/llm.py +++ b/denario/llm.py @@ -9,81 +9,111 @@ class LLM(BaseModel): """Maximum output tokens allowed.""" temperature: float | None """Temperature of the model.""" + openrouter_name: str | None = None + """OpenRouter model ID.""" gemini20flash = LLM(name="gemini-2.0-flash", max_output_tokens=8192, - temperature=0.7) + temperature=0.7, + openrouter_name="google/gemini-2.0-flash-exp:free") """`gemini-2.0-flash` model.""" gemini25flash = LLM(name="gemini-2.5-flash", max_output_tokens=65536, - temperature=0.7) + temperature=0.7, + openrouter_name="google/gemini-2.0-flash-exp:free") """`gemini-2.5-flash` model.""" gemini25pro = LLM(name="gemini-2.5-pro", max_output_tokens=65536, - temperature=0.7) + temperature=0.7, + openrouter_name="google/gemini-2.0-pro-exp-02-05:free") """`gemini-2.5-pro` model.""" +gemini3pro_preview = LLM(name="gemini-3-pro-preview", + max_output_tokens=65536, + temperature=0.7, + openrouter_name="google/gemini-3-pro-preview") +"""`gemini-3-pro-preview` model.""" + +gemini3flash_preview = LLM(name="gemini-3-flash-preview", + max_output_tokens=65536, + temperature=0.7, + openrouter_name="google/gemini-3-flash-preview") +"""`gemini-3-flash-preview` model.""" + o3mini = LLM(name="o3-mini-2025-01-31", max_output_tokens=100000, - temperature=None) + temperature=None, + openrouter_name="openai/o3-mini") """`o3-mini` model.""" gpt4o = LLM(name="gpt-4o-2024-11-20", max_output_tokens=16384, - temperature=0.5) + temperature=0.5, + openrouter_name="openai/gpt-4o") """`gpt-4o` model.""" gpt41 = LLM(name="gpt-4.1-2025-04-14", max_output_tokens=16384, - temperature=0.5) + temperature=0.5, + openrouter_name="openai/gpt-4-turbo") """`gpt-4.1` model.""" gpt41mini = LLM(name="gpt-4.1-mini", max_output_tokens=16384, - temperature=0.5) + temperature=0.5, + openrouter_name="openai/gpt-4-turbo-preview") """`gpt-4.1-mini` model.""" gpt4omini = LLM(name="gpt-4o-mini-2024-07-18", max_output_tokens=16384, - temperature=0.5) + temperature=0.5, + openrouter_name="openai/gpt-4o-mini") """`gpt-4o-mini` model.""" gpt45 = LLM(name="gpt-4.5-preview-2025-02-27", max_output_tokens=16384, - temperature=0.5) + temperature=0.5, + openrouter_name="openai/gpt-4.5-preview") """`gpt-4.5-preview` model.""" gpt5 = LLM(name="gpt-5", max_output_tokens=128000, - temperature=None) + temperature=None, + openrouter_name="openai/gpt-5") """`gpt-5` model """ gpt5mini = LLM(name="gpt-5-mini", max_output_tokens=128000, - temperature=None) + temperature=None, + openrouter_name="openai/gpt-5") """`gpt-5-mini` model.""" claude37sonnet = LLM(name="claude-3-7-sonnet-20250219", max_output_tokens=64000, - temperature=0) + temperature=0, + openrouter_name="anthropic/claude-3.7-sonnet") """`claude-3-7-sonnet` model.""" claude4opus = LLM(name="claude-opus-4-20250514", max_output_tokens=32000, - temperature=0) + temperature=0, + openrouter_name="anthropic/claude-3-opus") """`claude-4-Opus` model.""" claude41opus = LLM(name="claude-opus-4-1-20250805", max_output_tokens=32000, - temperature=0) + temperature=0, + openrouter_name="anthropic/claude-3-opus") """`claude-4.1-Opus` model.""" models : Dict[str, LLM] = { "gemini-2.0-flash" : gemini20flash, "gemini-2.5-flash" : gemini25flash, "gemini-2.5-pro" : gemini25pro, + "gemini-3-pro-preview" : gemini3pro_preview, + "gemini-3-flash-preview" : gemini3flash_preview, "o3-mini" : o3mini, "gpt-4o" : gpt4o, "gpt-4.1" : gpt41, diff --git a/denario/openrouter_config.py b/denario/openrouter_config.py new file mode 100644 index 00000000..68437974 --- /dev/null +++ b/denario/openrouter_config.py @@ -0,0 +1,152 @@ +"""OpenRouter configuration builder for cmbagent compatibility. + +This module provides utilities to build cmbagent-compatible LLM configurations +that route all API calls through OpenRouter, avoiding the need for native +provider SDKs (like Google Vertex AI). +""" +import os +from typing import Optional + +OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" + +# Map internal model names to OpenRouter model IDs +MODEL_TO_OPENROUTER = { + # Gemini models + "gemini-3-pro-preview": "google/gemini-3-pro-preview", + "gemini-3-flash-preview": "google/gemini-3-flash-preview", + "gemini-2.5-pro": "google/gemini-2.5-pro-preview-05-06", + "gemini-2.5-flash": "google/gemini-2.5-flash-preview-05-20", + # OpenAI models + "gpt-4o": "openai/gpt-4o", + "gpt-4o-2024-11-20": "openai/gpt-4o-2024-11-20", + "gpt-4.1": "openai/gpt-4.1", + "gpt-4.1-2025-04-14": "openai/gpt-4.1-2025-04-14", + "o3-mini": "openai/o3-mini", + "o3-mini-2025-01-31": "openai/o3-mini-2025-01-31", + # Claude models + "claude-3-opus": "anthropic/claude-3-opus", + "claude-3-sonnet": "anthropic/claude-3-sonnet", + "claude-3-7-sonnet-20250219": "anthropic/claude-3.7-sonnet", +} + + +def get_openrouter_model_id(model: str) -> str: + """Convert internal model name to OpenRouter model ID. + + Args: + model: Internal model name (e.g., "gemini-3-pro-preview") + + Returns: + OpenRouter model ID (e.g., "google/gemini-3-pro-preview") + """ + if "/" in model: + return model # Already in OpenRouter format + return MODEL_TO_OPENROUTER.get(model, f"openai/{model}") + + +def build_openrouter_config(model: str, api_key: Optional[str] = None) -> dict: + """Build cmbagent-compatible config for OpenRouter. + + Args: + model: Model name (internal or OpenRouter format) + api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var) + + Returns: + Config dict compatible with cmbagent's agent_llm_configs + """ + key = api_key or os.getenv("OPENROUTER_API_KEY") + openrouter_model = get_openrouter_model_id(model) + + config = { + "model": openrouter_model, + "api_key": key, + "api_type": "openai", + "base_url": OPENROUTER_BASE_URL, + } + + # Add reasoning_effort for o3 models + if "o3" in model: + config["reasoning_effort"] = "medium" + + return config + + +def build_agent_llm_configs( + engineer_model: str, + researcher_model: str, + planner_model: str, + plan_reviewer_model: str, + orchestration_model: str, + formatter_model: str, + api_key: Optional[str] = None, +) -> dict: + """Build complete agent_llm_configs dict for cmbagent. + + This creates configs for all agents used by cmbagent, routing them + through OpenRouter. + + Args: + engineer_model: Model for engineer agent + researcher_model: Model for researcher agent + planner_model: Model for planner agent + plan_reviewer_model: Model for plan reviewer agent + orchestration_model: Model for control/orchestration agents + formatter_model: Model for response formatter agents + api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var) + + Returns: + Dict mapping agent names to their LLM configs + """ + key = api_key or os.getenv("OPENROUTER_API_KEY") + + return { + # Main agents + "engineer": build_openrouter_config(engineer_model, key), + "researcher": build_openrouter_config(researcher_model, key), + "planner": build_openrouter_config(planner_model, key), + "plan_reviewer": build_openrouter_config(plan_reviewer_model, key), + "control": build_openrouter_config(orchestration_model, key), + "idea_maker": build_openrouter_config(orchestration_model, key), + "idea_hater": build_openrouter_config(orchestration_model, key), + "camb_context": build_openrouter_config(orchestration_model, key), + "plot_judge": build_openrouter_config(formatter_model, key), + # Formatters + "engineer_response_formatter": build_openrouter_config(formatter_model, key), + "researcher_response_formatter": build_openrouter_config(formatter_model, key), + "executor_response_formatter": build_openrouter_config(formatter_model, key), + "planner_response_formatter": build_openrouter_config(formatter_model, key), + "reviewer_response_formatter": build_openrouter_config(formatter_model, key), + "idea_maker_response_formatter": build_openrouter_config(formatter_model, key), + "idea_hater_response_formatter": build_openrouter_config(formatter_model, key), + "summarizer_response_formatter": build_openrouter_config(formatter_model, key), + # Other agents + "task_improver": build_openrouter_config(formatter_model, key), + "task_recorder": build_openrouter_config(orchestration_model, key), + "summarizer": build_openrouter_config(orchestration_model, key), + "perplexity": build_openrouter_config(formatter_model, key), + "aas_keyword_finder": build_openrouter_config(formatter_model, key), + "plot_debugger": build_openrouter_config(orchestration_model, key), + } + + +def build_cmbagent_api_keys(openrouter_key: Optional[str] = None) -> dict: + """Build api_keys dict for cmbagent using OpenRouter. + + CMBAgent expects a dict with OPENAI, GEMINI, ANTHROPIC keys. + We map OpenRouter key to all of them since agent_llm_configs + will override the actual usage. + + Args: + openrouter_key: OpenRouter API key (defaults to env var) + + Returns: + Dict compatible with cmbagent's api_keys parameter + """ + key = openrouter_key or os.getenv("OPENROUTER_API_KEY") + + return { + "OPENAI": key, + "GEMINI": key, + "ANTHROPIC": key, + "MISTRAL": key, + } diff --git a/denario/paper_agents/latex.py b/denario/paper_agents/latex.py index c8843c35..660298b2 100644 --- a/denario/paper_agents/latex.py +++ b/denario/paper_agents/latex.py @@ -1,6 +1,7 @@ import subprocess import os import re +import shutil from pathlib import Path from .parameters import GraphState @@ -92,23 +93,49 @@ def compile_tex_document(state: dict, doc_name: str, doc_folder: str) -> None: doc_stem = file_path.stem bib_path = os.path.join(state['files']['Temp'], "bibliography.bib") + def get_latex_command(name, is_final=False): + if shutil.which("xelatex"): + cmd = ["xelatex"] + if is_final: + cmd.extend(["-interaction=nonstopmode", "-file-line-error"]) + cmd.append(name) + return cmd, "xelatex" + elif shutil.which("tectonic"): + return ["tectonic", name], "tectonic" + else: + raise FileNotFoundError("Neither xelatex nor tectonic found in PATH.") + def run_xelatex(pass_num=None): - result = subprocess.run(["xelatex", doc_name], cwd=doc_folder, - input="\n", capture_output=True, text=True) - if result.returncode != 0: - print("❌", end="", flush=True) - clean_files(doc_name, doc_folder) - log_output(result) - extract_latex_errors(state) - return False - #raise RuntimeError(f"XeLaTeX failed (pass {pass_num}):\n{result.stderr}") - return True + try: + cmd, engine = get_latex_command(doc_name) + result = subprocess.run(cmd, cwd=doc_folder, + input="\n", capture_output=True, text=True) + if result.returncode != 0: + print("❌", end="", flush=True) + clean_files(doc_name, doc_folder) + log_output(result) + extract_latex_errors(state) + return False + return True + except FileNotFoundError: + print("⚠️", end="", flush=True) + with open(state['files']['LaTeX_log'], 'a') as f: + f.write(f"\n[WARNING] LaTeX engine (xelatex/tectonic) not found. Skipping compilation check.\n") + return True # Pretend it succeeded to let the flow continue def run_bibtex(): - result = subprocess.run(["bibtex", doc_stem], cwd=doc_folder, - capture_output=True, text=True) - if result.returncode != 0: - raise RuntimeError(f"BibTeX failed:\n{result.stderr}") + try: + # Tectonic handles bibliography automatically + cmd, engine = get_latex_command(doc_name) + if engine == "tectonic": + return + + result = subprocess.run(["bibtex", doc_stem], cwd=doc_folder, + capture_output=True, text=True) + if result.returncode != 0: + raise RuntimeError(f"BibTeX failed:\n{result.stderr}") + except FileNotFoundError: + print(" (No bibtex) ", end="", flush=True) def log_output(result): with open(state['files']['LaTeX_log'], 'a') as f: @@ -117,10 +144,22 @@ def log_output(result): f.write("---- STDERR ----\n") f.write(result.stderr) + # Determine engine + try: + _, engine = get_latex_command(doc_name) + except FileNotFoundError: + engine = None + # Pass 1 if not(run_xelatex(pass_num=1)): return False + if engine == "tectonic": + # Tectonic is a one-pass compiler + print("✅", end="", flush=True) + clean_files(doc_name, doc_folder) + return True + # Bibliography step if needed if os.path.exists(bib_path): run_bibtex() @@ -148,16 +187,39 @@ def compile_latex(state: GraphState, paper_name: str) -> None: # get the paper stem paper_stem = Path(paper_name).stem + def get_latex_command(name, is_final=False): + if shutil.which("xelatex"): + cmd = ["xelatex"] + if is_final: + cmd.extend(["-interaction=nonstopmode", "-file-line-error"]) + cmd.append(name) + return cmd, "xelatex" + elif shutil.which("tectonic"): + return ["tectonic", name], "tectonic" + else: + raise FileNotFoundError("Neither xelatex nor tectonic found in PATH.") + def run_xelatex(): - return subprocess.run(["xelatex", "-interaction=nonstopmode", "-file-line-error", paper_name], - cwd=state['files']['Paper_folder'], - input="\n", capture_output=True, - text=True, check=True) + try: + cmd, engine = get_latex_command(paper_name, is_final=True) + return subprocess.run(cmd, + cwd=state['files']['Paper_folder'], + input="\n", capture_output=True, + text=True, check=True) + except FileNotFoundError: + print("⚠️ (LaTeX engine missing, PDF not generated)", end="", flush=True) + return None def run_bibtex(): - subprocess.run(["bibtex", paper_stem], - cwd=state['files']['Paper_folder'], - capture_output=True, text=True) + try: + cmd, engine = get_latex_command(paper_name) + if engine == "tectonic": + return + subprocess.run(["bibtex", paper_stem], + cwd=state['files']['Paper_folder'], + capture_output=True, text=True) + except FileNotFoundError: + pass def log_output(i, result_or_error, is_error=False): with open(state['files']['LaTeX_log'], 'a') as f: @@ -167,6 +229,12 @@ def log_output(i, result_or_error, is_error=False): f.write("---- STDERR ----\n") f.write(result_or_error.stderr or "") + # Determine engine + try: + _, engine = get_latex_command(paper_name) + except FileNotFoundError: + engine = None + # Try to compile it the first time print(f'Compiling {paper_stem}'.ljust(33,'.'), end="", flush=True) try: @@ -176,6 +244,11 @@ def log_output(i, result_or_error, is_error=False): log_output("Pass 1", e, is_error=True) print("❌", end="", flush=True) + if engine == "tectonic": + # Tectonic is one-pass + print("") + return + # if there is bibliography, compile it further_iterations = 1 if os.path.exists(f"{state['files']['Paper_folder']}/bibliography.bib"): diff --git a/denario/paper_agents/literature.py b/denario/paper_agents/literature.py index 36bc625e..4e13ca11 100644 --- a/denario/paper_agents/literature.py +++ b/denario/paper_agents/literature.py @@ -7,18 +7,22 @@ def _execute_query(payload, keys: KeyManager): """ Executes a query by sending a POST request to the Perplexity API. - - Args: - payload (dict[str, Any]): The payload to send in the API request. - - Returns: - PerplexityChatCompletionResponse: Parsed response from the Perplexity API. """ api_key = keys.PERPLEXITY - headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} - response = requests.post("https://api.perplexity.ai/chat/completions", headers=headers, json=payload).json() + if not api_key: + print("\n[WARNING] PERPLEXITY_API_KEY not found. Skipping citations.") + return None - return response + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + try: + response = requests.post("https://api.perplexity.ai/chat/completions", headers=headers, json=payload, timeout=60) + if response.status_code != 200: + print(f"\n[WARNING] Perplexity API returned status code {response.status_code}") + return None + return response.json() + except Exception as e: + print(f"\n[WARNING] Error calling Perplexity API: {e}") + return None def perplexity(para, keys: KeyManager): @@ -61,21 +65,12 @@ def perplexity(para, keys: KeyManager): "search_domain_filter": ["arxiv.org"], } perplexity_response = _execute_query(payload, keys) + if perplexity_response is None or "choices" not in perplexity_response: + return (para, []) # Return original text and no citations on failure + content = perplexity_response["choices"][0]["message"]["content"] - citations = perplexity_response["citations"] + citations = perplexity_response.get("citations", []) cleaned_response = re.sub(r'.*?\s*', '', content, flags=re.DOTALL) - - def citation_repl(match): - # Extract the citation number as a string and convert to an integer. - number_str = match.group(1) - index = int(number_str) - 1 # Adjust for 0-based indexing - if 0 <= index < len(citations): - return f'[[{number_str}]({citations[index]})]' - # If the citation number is out of bounds, return it unchanged. - return match.group(0) - # Replace all instances of citations in the form [x] using the helper function. - # markdown_response = re.sub(r'\[(\d+)\]', citation_repl, cleaned_response) - #display(Markdown(markdown_response)) return (cleaned_response, citations) diff --git a/denario/paper_agents/reader.py b/denario/paper_agents/reader.py index e7dfcb8c..9fb71d8c 100644 --- a/denario/paper_agents/reader.py +++ b/denario/paper_agents/reader.py @@ -11,6 +11,7 @@ from .parameters import GraphState from .latex_presets import journal_dict from ..config import INPUT_FILES, IDEA_FILE, METHOD_FILE, RESULTS_FILE, PAPER_FOLDER, PLOTS_FOLDER, LaTeX_DIR +from ..llm import models def preprocess_node(state: GraphState, config: RunnableConfig): @@ -19,7 +20,19 @@ def preprocess_node(state: GraphState, config: RunnableConfig): """ # set the LLM - if 'gemini' in state['llm']['model']: + if state["keys"].OPENROUTER: + model_name = state['llm']['model'] + openrouter_model = model_name + if model_name in models: + if models[model_name].openrouter_name: + openrouter_model = models[model_name].openrouter_name + + state['llm']['llm'] = ChatOpenAI(model=openrouter_model, + temperature=state['llm']['temperature'], + openai_api_key=state["keys"].OPENROUTER, + base_url="https://openrouter.ai/api/v1") + + elif 'gemini' in state['llm']['model']: state['llm']['llm'] = ChatGoogleGenerativeAI(model=state['llm']['model'], temperature=state['llm']['temperature'], google_api_key=state["keys"].GEMINI) diff --git a/denario/utils.py b/denario/utils.py index a2556186..739265ef 100644 --- a/denario/utils.py +++ b/denario/utils.py @@ -40,7 +40,7 @@ def extract_file_paths(markdown_text): """ # Pattern to match file paths in markdown bullet points - pattern = r'-\s*([^\n]+\.(?:csv|txt|md|py|json|yaml|yml|xml|html|css|js|ts|tsx|jsx|java|cpp|c|h|hpp|go|rs|php|rb|pl|sh|bat|sql|log))' + pattern = r'-\s*([^\n]+\.(?:h5|xlsx|xls|csv|txt|md|py|json|yaml|yml|xml|html|css|js|ts|tsx|jsx|java|cpp|c|h|hpp|go|rs|php|rb|pl|sh|bat|sql|log))' # Find all matches matches = re.findall(pattern, markdown_text, re.IGNORECASE) @@ -87,11 +87,11 @@ def get_task_result(chat_history, name: str): """Get task result from chat history""" for obj in chat_history[::-1]: - if obj['name'] == name: - result = obj['content'] - break - task_result = result - return task_result + # Using .get() to avoid KeyError if 'name' is missing + if obj.get('name') == name: + return obj.get('content') + + return None def in_notebook(): """Check whether the code is run from a Jupyter Notebook or not, to use different display options"""