From 5091a23a1977c0ead137d0e3f1547d1cc78a8ab8 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 20 Jun 2025 16:11:21 -0400 Subject: [PATCH 1/5] resolve conflict --- Chat.py | 492 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 271 insertions(+), 221 deletions(-) diff --git a/Chat.py b/Chat.py index 978fe73..677191f 100644 --- a/Chat.py +++ b/Chat.py @@ -13,6 +13,7 @@ import sys from pathlib import Path import psycopg2 +import streamlit.components.v1 as components # Set logging level to DEBUG for detailed logs # logging.basicConfig(level=logging.DEBUG) @@ -31,7 +32,7 @@ + # ''' + # st.markdown(css, unsafe_allow_html=True) + # left_col, right_col = st.columns([2, 3]) + # # Initialize selection + # if "selected_idx" not in st.session_state: + # st.session_state.selected_idx = None + + # # LEFT COLUMN: chat messages with inline "Details" buttons + # with left_col: + # st.markdown("### Chat") - # If candidate solutions are provided, display them separately. - if candidate_solutions is not None: - st.markdown("### Candidate Solutions") - for idx, candidate in enumerate(candidate_solutions, start=1): - with st.expander(f"Candidate {idx}: {candidate.get('option', 'Option')}"): - st.markdown(f"**Explanation:** {candidate.get('explanation', '')}") - st.markdown(f"**Pros:** {candidate.get('pros', '')}") - st.markdown(f"**Cons:** {candidate.get('cons', '')}") - # A button to allow the user to refine this candidate solution. - if st.button("Refine this solution", key=f"refine_candidate_{msg_idx}_{idx}"): - # Pre-fill input with candidate details for refinement. - st.session_state["prefilled_input"] = candidate.get("option", "") + " " + candidate.get("explanation", "") - else: - # Display the explanation text normally. - if "count" in explanation and "mean" in explanation and "std" in explanation: - st.code(explanation) - else: - st.markdown(explanation) - # Display intermediate steps if available. - if middle_steps: - # self.display_middle_steps(middle_steps) - with st.expander("View Intermediate Steps"): - st.markdown(middle_steps) + # for i, msg in enumerate(st.session_state.get("messages", [])): + # # Render chat bubble + # with st.chat_message(msg.get("role", "assistant")): + # st.markdown(msg.get("content", "")) + # # If assistant message has attachments, show small 'Details' button + # has_images = bool(msg.get("image_paths")) + # has_files = bool(msg.get("file_paths")) + # if msg.get("role") == "assistant" and (has_images or has_files): + # if st.button("Details πŸ”", key=f"details_{i}"): + # st.session_state.selected_idx = i - # Display any generated plots. - for plot_path in plot_paths: - if plot_path and os.path.exists(plot_path): - image = Image.open(plot_path) - file_name = os.path.basename(plot_path) - file_name_no_ext = os.path.splitext(file_name)[0] - st.image(image, caption=file_name_no_ext) + # st.markdown("---") + # # Chat input + # question = st.chat_input("Ask a question about the dataset") + # if question or st.session_state.get("prefilled_input"): + # if st.session_state.get("prefilled_input"): + # question = st.session_state["prefilled_input"] + # st.session_state["prefilled_input"] = None + # self.handle_user_input(st.session_state.get("analysis_file_path", ""), question) + + # # RIGHT COLUMN: show plots/tables for selected message + # with right_col: + # idx = st.session_state.get("selected_idx") + # if idx is None: + # st.info("Click 'Details' on an assistant message to view its plots & tables here.") - # Display file download buttons for any generated files. - for file_path in file_paths: - if file_path and os.path.exists(file_path): - - if file_path.lower().endswith(".csv"): - try: - df = pd.read_csv(file_path) - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - if file_path.lower().endswith(".tsv"): - try: - df = pd.read_csv(file_path, sep="\t") - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - - unique_key = str(uuid.uuid4()) - with open(file_path, "rb") as f: - st.download_button( - label=f"Download {os.path.basename(file_path)}", - data=f, - file_name=os.path.basename(file_path), - key=f"download_{unique_key}" - ) - - bookmark_data = { - "question": st.session_state["messages"][-2]["content"] if len(st.session_state["messages"]) > 1 else "Unknown", - "answer": explanation, - "plots": plot_paths, - "files": file_paths, + # else: + # msg = st.session_state["messages"][idx] + # st.markdown(f"### Details for query: {st.session_state['messages'][idx-1].get('content')}") + # # Show images + # if msg.get("image_paths"): + # st.markdown("#### Images") + # for img in msg["image_paths"]: + # st.image(img) + # # Show data files / tables + # if msg.get("file_paths"): + # st.markdown("#### Tables & Downloads") + # for path in msg["file_paths"]: + # fname = os.path.basename(path) + # if fname.lower().endswith(('.csv', '.tsv')): + # df = pd.read_csv(path, sep="\t" if fname.lower().endswith('tsv') else ",") + # st.dataframe(df) + # with open(path, "rb") as f: + # st.download_button( + # label=f"Download {fname}", data=f, file_name=fname, + # key=f"dl_{idx}_{fname}" + # ) + # # end display_chat_history + + def display_chat_history(self): + # Inject CSS for two scrollable areas + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) - - if st.session_state.get("db_available", False): - if not st.session_state.get(f"feedback_submitted_{msg_idx}", False): - col1, col2 = st.columns(2) - # The on_click callback immediately stores the feedback. - col1.button("πŸ‘", key=f"thumbs_up_{msg_idx}", on_click=self.submit_feedback_response, args=("Yes", msg_idx)) - col2.button("πŸ‘Ž", key=f"thumbs_down_{msg_idx}", on_click=self.submit_feedback_response, args=("No", msg_idx)) - else: - st.info("Feedback recorded!") - # Allow the user to add or update an optional comment. - comment = st.text_area("Optional comment:", key=f"feedback_comment_{msg_idx}") - if st.button("Update Comment", key=f"update_comment_{msg_idx}"): - feedback_id = st.session_state.get(f"feedback_id_{msg_idx}") - update_feedback_comment(feedback_id, comment) - st.success("Comment updated!") - - if not candidate_solutions and next_steps_suggestion: - suggestions = [s.strip() for s in next_steps_suggestion.split("\n") if s.strip()] - self.display_suggestion_buttons(suggestions) - st.markdown("Please let me know if you want to proceed with any of the suggestions or ask any other questions.") + left_col, right_col = st.columns([2, 1]) + + if "selected_idx" not in st.session_state: + st.session_state.selected_idx = None + + # LEFT COLUMN: Chat + with left_col: + st.markdown("### Chat") + with st.container(): + st.markdown('
', unsafe_allow_html=True) + + for i, msg in enumerate(st.session_state.get("messages", [])): + with st.chat_message(msg["role"]): + st.markdown(msg["content"]) + if ( + msg["role"] == "assistant" + and (msg.get("image_paths") or msg.get("file_paths")) + ): + if st.button("Details πŸ”", key=f"detail_btn_{i}"): + st.session_state.selected_idx = i + + st.markdown("
", unsafe_allow_html=True) + + st.markdown("---") + question = st.chat_input("Ask a question about the dataset") + if question: + self.handle_user_input( + st.session_state.get("analysis_file_path", ""), question + ) + # RIGHT COLUMN: Details + with right_col: + st.markdown("### Details") + with st.container(): + st.markdown('
', unsafe_allow_html=True) - def display_chat_history(self): - messages = st.session_state.get("messages", []) - - for idx, message in enumerate(messages): - if not message or not message.get("role") or not message.get("content"): - continue - with st.chat_message(message["role"]): - # Display the main content. - if "count" in message.get("content", "") and "mean" in message.get("content", "") and "std" in message.get("content", ""): - st.code(message["content"]) + idx = st.session_state.selected_idx + if idx is None: + st.info("Click 'Details' to view assistant-generated plots and tables.") else: - st.markdown(message["content"]) - - # Display candidate solutions if they exist. - if "candidate_solutions" in message and message["candidate_solutions"]: - st.markdown("### Candidate Solutions") - for c_idx, candidate in enumerate(message["candidate_solutions"], start=1): - with st.expander(f"Candidate {c_idx}: {candidate.get('option', 'Option')}"): - st.markdown(f"**Explanation:** {candidate.get('explanation', '')}") - st.markdown(f"**Pros:** {candidate.get('pros', '')}") - st.markdown(f"**Cons:** {candidate.get('cons', '')}") - if st.button("Refine this solution", key=f"history_refine_candidate_{idx}_{c_idx}"): - prefill = candidate.get("option", "") + " " + candidate.get("explanation", "") - st.session_state["prefilled_input"] = prefill - - # Display intermediate steps if available. - if "middle_steps" in message and message["middle_steps"]: - with st.expander("View Intermediate Steps"): - st.markdown(message["middle_steps"]) - - # Display any generated plots. - if "image_paths" in message: - for plot_path in message["image_paths"]: - if os.path.exists(plot_path): - image = Image.open(plot_path) - file_name = os.path.basename(plot_path) - file_name_no_ext = os.path.splitext(file_name)[0] - st.image(image, caption=file_name_no_ext) - - # Display file download buttons for any generated files. - if "file_paths" in message: - for file_path in message["file_paths"]: - if os.path.exists(file_path): - - - if file_path.lower().endswith(".tsv"): - try: - df = pd.read_csv(file_path, sep="\t") - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - - if file_path.lower().endswith(".csv"): - try: - df = pd.read_csv(file_path) - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - - unique_key = str(uuid.uuid4()) - with open(file_path, "rb") as f: + msg = st.session_state["messages"][idx] + if msg.get("image_paths"): + st.markdown("#### Images") + for p in msg["image_paths"]: + st.image(p) + if msg.get("file_paths"): + st.markdown("#### Tables & Downloads") + for p in msg["file_paths"]: + fname = os.path.basename(p) + df = pd.read_csv(p, sep="\t" if fname.lower().endswith("tsv") else ",") + st.dataframe(df) + with open(p, "rb") as f: st.download_button( - label=f"Download {os.path.basename(file_path)}", - data=f, - file_name=os.path.basename(file_path), - key=f"history_download_{unique_key}" + f"Download {fname}", + f, + file_name=fname, + key=f"dl_{idx}_{fname}", ) - - - if message["role"] == "assistant": - # If feedback hasn't been submitted for this message, show the thumbs buttons. - if st.session_state.get("db_available", False): - if not st.session_state.get(f"feedback_submitted_{idx}", False): - col1, col2 = st.columns(2) - col1.button("πŸ‘", key=f"thumbs_up_{idx}", on_click=self.submit_feedback_response, args=("Yes", idx)) - col2.button("πŸ‘Ž", key=f"thumbs_down_{idx}", on_click=self.submit_feedback_response, args=("No", idx)) - - else: - st.info("Feedback recorded!") - comment = st.text_area("Optional comment:", key=f"feedback_comment_{idx}") - if st.button("Update Comment", key=f"update_comment_{idx}"): - feedback_id = st.session_state.get(f"feedback_id_{idx}") - update_feedback_comment(feedback_id, comment) - st.success("Comment updated!") - - if not message.get("bookmarked", False): - # Grab the preceding user message if it exists, else leave blank - prev_q = ( - messages[idx - 1]["content"] - if idx > 0 and messages[idx - 1]["role"] == "user" - else "" - ) - bookmark_data = { - "question": prev_q, - "answer": message["content"], - "plots": message.get("image_paths", []), - "files": message.get("file_paths", []) - } - if st.button("πŸ”– Bookmark this response", key=f"bookmark_{idx}"): - st.session_state["bookmarks"].append(bookmark_data) - # mark in-place so button won’t reappear - st.session_state["messages"][idx]["bookmarked"] = True - self.save_chat_history() - st.rerun() - st.success("Response bookmarked!") - else: - st.markdown("βœ… Bookmarked") - - # Display next steps suggestions. - if "next_steps_suggestion" in message and message["next_steps_suggestion"] and idx != len(messages) - 1: - st.markdown(f"**Next Steps Suggestion:** \n* {message['next_steps_suggestion']}") - - if messages: - last_message = messages[-1] - # Only display suggestion buttons if the last message is from the assistant and has suggestions - if last_message["role"] == "assistant" and last_message.get("next_steps_suggestion") and not last_message.get("candidate_solutions"): - suggestions = [s.strip() for s in last_message["next_steps_suggestion"].split("\n") if s.strip()] - self.display_suggestion_buttons(suggestions) + + st.markdown("
", unsafe_allow_html=True) + + + def display_suggestion_buttons(self, suggestions): """Display next step suggestions as clickable links inside the chat.""" @@ -1279,12 +1329,12 @@ def run(self): # if uploaded_file is not None: # df.to_csv("uploaded_dataset.csv", index=False) - user_question = st.chat_input("Ask a question about the dataset") - if user_question or st.session_state.get("prefilled_input"): - if st.session_state.get("prefilled_input"): - user_question = st.session_state["prefilled_input"] - st.session_state["prefilled_input"] = None - self.handle_user_input(st.session_state["analysis_file_path"] , user_question) + # user_question = st.chat_input("Ask a question about the dataset") + # if user_question or st.session_state.get("prefilled_input"): + # if st.session_state.get("prefilled_input"): + # user_question = st.session_state["prefilled_input"] + # st.session_state["prefilled_input"] = None + # self.handle_user_input(st.session_state["analysis_file_path"] , user_question) st.sidebar.markdown("---") st.sidebar.markdown("### Exploratory Data Analysis") From 66a2170ebca6d87e134d31f32c046ae6b838ceea Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 20 Jun 2025 18:46:05 -0400 Subject: [PATCH 2/5] organize and push dspy agent script --- .gitignore | 3 +- Dockerfile => dspy_agent/Dockerfile | 0 dspy_agent/chat_dspy.py | 1806 +++++++++++++++++ dspy_agent/requirements_nicegui_dspy.txt | 34 + favicon.ico => dspy_agent/static/favicon.ico | Bin .../chat_analysis.py | 2 +- config.py => pandasai_agent/config.py | 0 .../requirements_pandas.txt | 0 Chat.py => smolagents_agent/Chat.py | 230 ++- smolagents_agent/chat_nicegui.py | 1557 ++++++++++++++ .../pages}/1_Bookmarks.py | 0 smolagents_agent/prompt.py | 135 ++ .../requirements.txt | 0 smolagents_agent/requirements_nicegui.txt | 23 + tools/chat_analysis.xml | 4 +- 15 files changed, 3762 insertions(+), 32 deletions(-) rename Dockerfile => dspy_agent/Dockerfile (100%) create mode 100644 dspy_agent/chat_dspy.py create mode 100644 dspy_agent/requirements_nicegui_dspy.txt rename favicon.ico => dspy_agent/static/favicon.ico (100%) rename chat_analysis.py => pandasai_agent/chat_analysis.py (99%) rename config.py => pandasai_agent/config.py (100%) rename requirements_old.txt => pandasai_agent/requirements_pandas.txt (100%) rename Chat.py => smolagents_agent/Chat.py (87%) create mode 100644 smolagents_agent/chat_nicegui.py rename {pages => smolagents_agent/pages}/1_Bookmarks.py (100%) create mode 100644 smolagents_agent/prompt.py rename requirements.txt => smolagents_agent/requirements.txt (100%) create mode 100644 smolagents_agent/requirements_nicegui.txt diff --git a/.gitignore b/.gitignore index d0bcc30..a028562 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,5 @@ outputs_dir/ user_config_* *.pkl test*.py -test*.xml \ No newline at end of file +test*.xml +dspy_agent.py diff --git a/Dockerfile b/dspy_agent/Dockerfile similarity index 100% rename from Dockerfile rename to dspy_agent/Dockerfile diff --git a/dspy_agent/chat_dspy.py b/dspy_agent/chat_dspy.py new file mode 100644 index 0000000..4c6d5c4 --- /dev/null +++ b/dspy_agent/chat_dspy.py @@ -0,0 +1,1806 @@ +import os +import re +import pandas as pd +from collections import deque +from dotenv import load_dotenv +import json +import uuid +import logging +import sys +from pathlib import Path +import psycopg2 # Keep for DB functionality if still needed +import asyncio +import argparse +import traceback # For PythonCodeTool +from io import StringIO # For PythonCodeTool + +# NiceGUI imports +from nicegui import ui, app, Client +from nicegui.events import UploadEventArguments +from functools import lru_cache + +import cloudpickle as pickle +from pathlib import Path + +APP_OUTPUT_DIR = Path(os.getenv("APP_OUTPUT_DIR", "outputs_dir")) +SCRIPT_PATH = Path(__file__).resolve().parent + + +try: + APP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + dspy_cache_path = APP_OUTPUT_DIR / ".dspy_cache" + dspy_cache_path.mkdir(parents=True, exist_ok=True) + os.environ["DSPY_CACHEDIR"] = str(dspy_cache_path.resolve()) + + matplotlib_cache_path = APP_OUTPUT_DIR / ".matplotlib_cache" + matplotlib_cache_path.mkdir(parents=True, exist_ok=True) + os.environ["MPLCONFIGDIR"] = str(matplotlib_cache_path.resolve()) + + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') + logging.info(f"SCRIPT CWD when starting: {Path.cwd()}") + logging.info(f"APP_OUTPUT_DIR resolved to: {APP_OUTPUT_DIR.resolve()}") + logging.info(f"DSPY_CACHE_DIR set to: {os.environ['DSPY_CACHE_DIR']}") + logging.info(f"MPLCONFIGDIR set to: {os.environ['MPLCONFIGDIR']}") + +except Exception as e: + print(f"ERROR during initial cache path setup: {e}", file=sys.stderr) + + +import dspy +from dspy.teleprompt import BootstrapFewShot +logging.info("DSPy imported successfully.") + +# --- Global Constants and Configuration --- +SCRIPT_PATH = Path(__file__).resolve().parent +OPENAI_API_KEY_FILE = Path("user_config_openai.key") +GROQ_API_KEY_FILE = Path("user_config_groq.key") +DEFAULT_outputs_dir = Path("outputs_dir") +AGENT_GENERATED_FILES_SUBDIR = Path("generated_files") +DEFAULT_CHAT_HISTORY_FILE = Path("chat_history_nicegui_dspy.json") +DEFAULT_DSPY_EXAMPLES_FILE = SCRIPT_PATH / Path("examples.json") +MODEL_PRICING = { + "openai/gpt-4o": { + "prompt": 2.50 / 1_000_000, + "cached_prompt": 1.25 / 1_000_000, + "completion": 10.00 / 1_000_000, + }, + "openai/gpt-4.1": { + "prompt": 2.00 / 1_000_000, + "cached_prompt": 0.50 / 1_000_000, + "completion": 8.00 / 1_000_000, + }, + "openai/gpt-4.1-mini": { + "prompt": 0.40 / 1_000_000, + "cached_prompt": 0.10 / 1_000_000, + "completion": 1.60 / 1_000_000, + }, + +} + + + +load_dotenv() +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True) +logging.info("Logging configured successfully.") +# For more detailed DSPy logging during development: +# logging.getLogger("dspy").setLevel(logging.DEBUG) + + +# This list of authorized modules is crucial for the security of the code execution tool. +AUTHORIZED_MODULES_FOR_CODE_TOOL = [ + "pandas", "numpy", "matplotlib.pyplot", "seaborn", "scipy.stats", + "pathlib", "io", "sklearn", "autogluon", "random", "joblib", "openpyxl", + "anndata", "Bio", "vcf", "statsmodels", "plotly", +] + +class PythonCodeTool(dspy.Tool): + name = "python_code_executor" + input_variable = "code" + output_variable = "tool_output" + description = ( + "Executes python code for data analysis. " + "The code MUST save any generated files (plots, CSVs) into the 'outputs_dir / \"generated_file\"' directory (e.g., outputs_dir / \"generated_file/plot.png\"). " + "The path to the primary dataset being analyzed is available as 'dataset_path_in_tool_code'. " + "Print statements will be captured as output. The code MUST print the relative path from 'outputs_dir' for any saved file (e.g., print 'generated_file/my_plot.png')." + ) + + def __init__(self, outputs_dir: Path, current_dataset_path: Path | None): + super().__init__(func=self.__call__) + self.outputs_dir = Path(outputs_dir) + self.agent_files_dir = self.outputs_dir / AGENT_GENERATED_FILES_SUBDIR # Uses global constant + self.agent_files_dir.mkdir(parents=True, exist_ok=True) + self.current_dataset_path = Path(current_dataset_path) if current_dataset_path else None + # self.logger is removed + + def __call__(self, code: str) -> str: + logging.info(f"PythonCodeTool: Executing code (first 200 chars): {code[:200]}...") + + # Import the original builtins module to get a reference to the real __import__ + import builtins as _builtins_module + + # Define a custom safe import function + def _custom_safe_import(name, globals_map=None, locals_map=None, fromlist=(), level=0): + + # Check if the module itself or its top-level part is authorized + top_level_module = name.split('.')[0] + is_authorized = False + if top_level_module in AUTHORIZED_MODULES_FOR_CODE_TOOL: + is_authorized = True + else: + # Check for cases like 'matplotlib.pyplot' where 'matplotlib.pyplot' is authorized + for auth_mod in AUTHORIZED_MODULES_FOR_CODE_TOOL: + if name == auth_mod or name.startswith(auth_mod + '.'): + is_authorized = True + break + + if is_authorized: + # If authorized, use the real __import__ + return _builtins_module.__import__(name, globals_map, locals_map, fromlist, level) + else: + logging.warning(f"PythonCodeTool: Denied import of '{name}' by custom_safe_import.") + raise ImportError(f"Import of module '{name}' is restricted by the PythonCodeTool environment. " + f"Only modules from the authorized list can be imported: {AUTHORIZED_MODULES_FOR_CODE_TOOL}") + + resolved_outputs_dir = self.outputs_dir.resolve() + restricted_globals = { + "__builtins__": { + "print": print, "range": range, "len": len, "abs": abs, "str": str, + "int": int, "float": float, "list": list, "dict": dict, "set": set, + "tuple": tuple, "zip": zip, "enumerate": enumerate, "sorted": sorted, + "all": all, "any": any, "isinstance": isinstance, "open": open, + "round": round, "sum": sum, "min": min, "max": max, + "getattr": getattr, "hasattr": hasattr, "repr": repr, "callable": callable, + "True": True, "False": False, "None": None, + "Exception": Exception, "ValueError": ValueError, "TypeError": TypeError, + "IndexError": IndexError, "KeyError": KeyError, "AttributeError": AttributeError, + "NameError": NameError, "FileNotFoundError": FileNotFoundError, + "ImportError": ImportError, # Keep Python's ImportError for the exception type + "RuntimeError": RuntimeError, "NotImplementedError": NotImplementedError, + "ZeroDivisionError": ZeroDivisionError, + "__import__": _custom_safe_import, # Crucial: Add our safe import here + }, + "outputs_dir": self.outputs_dir, # Assuming this was corrected to singular based on prev error + "dataset_path_in_tool_code": self.current_dataset_path, + "Path": Path + } + + # if the agent uses `pandas.read_csv`. However, LLMs love aliased imports. + for mod_name in AUTHORIZED_MODULES_FOR_CODE_TOOL: + try: + if '.' in mod_name: # e.g. "matplotlib.pyplot" + parts = mod_name.split('.') + imported_mod_obj = _builtins_module.__import__(parts[0], fromlist=parts[1:]) + for part in parts[1:]: # Access submodules like pyplot from matplotlib + imported_mod_obj = getattr(imported_mod_obj, part) + + alias = parts[-1] # e.g., "pyplot" + restricted_globals[alias] = imported_mod_obj + if mod_name == "matplotlib.pyplot": # Common alias + restricted_globals["plt"] = imported_mod_obj + else: # e.g. "pandas" + restricted_globals[mod_name] = _builtins_module.__import__(mod_name) + logging.info(f"PythonCodeTool: Made module '{mod_name}' available directly in exec scope.") + except ImportError as e: + logging.warning(f"PythonCodeTool: Could not pre-import authorized module: {mod_name} - {e}") + + # Secondary check for disallowed imports in the code string (less robust than controlling __import__) + disallowed_imports_in_code = ["os", "subprocess", "sys", "shutil", "requests", "socket", "http", "glob", "tkinter", "pyautogui"] + for disallowed in disallowed_imports_in_code: + # This check is a bit naive, as code could obscure imports (e.g. exec("import os")) + # The _custom_safe_import is the more effective control for direct `import` statements. + if f"import {disallowed}" in code or f"from {disallowed}" in code: + # Check if it's an attempt to import a disallowed top-level module + is_truly_disallowed = True + if disallowed in restricted_globals: # It was explicitly allowed and pre-imported + is_truly_disallowed = False + + if is_truly_disallowed: + logging.warning(f"PythonCodeTool: Code string contains potentially disallowed import pattern: {disallowed}") + + + old_stdout = sys.stdout + sys.stdout = captured_output = StringIO() + full_output_str = "" + try: + exec(code, restricted_globals) # The 'code' (e.g. "import pandas as pd") will use _custom_safe_import + stdout_val = captured_output.getvalue() + full_output_str += f"STDOUT:\n{stdout_val}\nExecution successful." + logging.info(f"PythonCodeTool: Execution STDOUT: {stdout_val[:500]}") + except Exception as e: + tb_str = traceback.format_exc() + error_output = f"Execution failed.\nERROR_TYPE: {type(e).__name__}\nERROR_MESSAGE: {str(e)}\nTRACEBACK:\n{tb_str}" + full_output_str += error_output + logging.error(f"PythonCodeTool: Execution error: {error_output[:1000]}") + finally: + sys.stdout = old_stdout + + return full_output_str[:3000] + + +# class DataAnalysisSignature(dspy.Signature): +# """ +# You are an expert data analysis assistant. +# Given a user's question, conversation history (for context), and dataset information (path, type), +# reason step-by-step (Thought) and then use the provided python_code_executor tool (Action with code input) +# to generate and execute Python code for data analysis. +# You don't need to create the outputs_dir. +# The executed Python code should print confirmation of file saves. +# For machine learning tasks, always return performance metrics and always include plots and the model weights file. +# For generated files and plots, always have +# Finally, provide a comprehensive answer to the user in JSON format. This JSON MUST include: +# - "explanation": A textual explanation of what was done and the insights. +# - "plots": A list of relative paths (from 'outputs_dir') to any generated plot image files. +# - "files": A list of relative paths (from 'outputs_dir') to any generated data files (e.g., CSVs). +# - "next_steps_suggestion": A list of 2-3 brief, actionable next step questions or analysis tasks the user might find relevant based on the current findings. +# """ +# context = dspy.InputField(desc="Provides context: conversation history, current dataset path, dataset type, and output directory information.") +# question = dspy.InputField(desc="The user's question or data analysis task.") +# final_answer = dspy.OutputField(desc=f"A JSON string with 'explanation', 'plots' (list of strings relative to '{AGENT_GENERATED_FILES_SUBDIR.name}/'), 'files' (list of strings relative to '{AGENT_GENERATED_FILES_SUBDIR.name}/'), and 'next_steps_suggestion' (a list of 2-3 relevant follow-up questions or analysis tasks).") # Ensure AGENT_GENERATED_FILES_SUBDIR is globally defined + +class DataAnalysisSignature(dspy.Signature): + """ + You are an expert data analysis assistant. + Given a user's question, conversation history (for context), and dataset information (path, type), + reason step-by-step (Thought) and then use the provided python_code_executor tool (Action with code input) + to generate and execute Python code for data analysis. + You don't need to create the outputs_dir. + The executed Python code should print confirmation of file saves. + For machine learning tasks, always return performance metrics and always include plots and the model weights file. + + **IMPORTANT: To prevent file conflicts, all generated file and plot names MUST end with a unique suffix (e.g., a short random string or number). For example, save 'plot.png' as 'plot_a8d3.png'.** + + Finally, provide a comprehensive answer to the user in JSON format. This JSON MUST include: + - "explanation": A textual explanation of what was done and the insights. + - "plots": A list of relative paths (from 'outputs_dir') to any generated plot image files. rember to return the paths for all plots generated. + - "files": A list of relative paths (from 'outputs_dir') to any generated data files (e.g., CSVs). + - "next_steps_suggestion": A list of 2-3 brief, actionable next step questions or analysis tasks the user might find relevant based on the current findings. + """ + context = dspy.InputField(desc="Provides context: conversation history, current dataset path, dataset type, and output directory information.") + question = dspy.InputField(desc="The user's question or data analysis task.") + final_answer = dspy.OutputField(desc=f"A JSON string with 'explanation', 'plots' (list of strings relative to '{AGENT_GENERATED_FILES_SUBDIR.name}/'), 'files' (list of strings relative to '{AGENT_GENERATED_FILES_SUBDIR.name}/'), and 'next_steps_suggestion' (a list of 2-3 relevant follow-up questions or analysis tasks).") # Ensure AGENT_GENERATED_FILES_SUBDIR is globally defined + +class DataAnalysisAgentModule(dspy.Module): # Renamed to avoid conflict with smolagents.CodeAgent if it was an object + """The main DSPy agent module for data analysis, using ReAct.""" + def __init__(self, outputs_dir: Path, current_dataset_path: Path | None, max_iters=7): # Max_iters can be tuned + super().__init__() + self.react_agent = dspy.ReAct( + DataAnalysisSignature, + tools=[PythonCodeTool(outputs_dir=outputs_dir, current_dataset_path=current_dataset_path)], + max_iters=max_iters + ) + + def forward(self, question, context): + return self.react_agent(question=question, context=context) + + +def save_key_to_specific_file(file_path: Path, key_value: str): + try: + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, "w") as f: f.write(key_value) + logging.info(f"API key saved to {file_path}") + except Exception as e: logging.error(f"Error saving API key to {file_path}: {e}", exc_info=True) + +def load_key_from_specific_file(file_path: Path) -> str | None: + try: + if file_path.exists(): + with open(file_path, "r") as f: key = f.read().strip() + if key: logging.info(f"API key loaded from {file_path}"); return key + except Exception as e: logging.error(f"Error loading API key from {file_path}: {e}", exc_info=True) + return None + +def check_db_env_vars(): + required_vars = ["PG_HOST_DA", "PG_DB_DA", "PG_USER_DA", "PG_PASSWORD_DA"] + missing_vars = [var for var in required_vars if not os.environ.get(var)] # os.environ is fine here + if missing_vars: logging.warning(f"Missing DB env vars: {missing_vars}"); return False + return True + +def get_db_connection(): + if not check_db_env_vars(): return None + try: + return psycopg2.connect( + host=os.environ["PG_HOST_DA"], database=os.environ["PG_DB_DA"], + user=os.environ["PG_USER_DA"], password=os.environ["PG_PASSWORD_DA"] + ) + except Exception as e: logging.error(f"DB connection failed: {e}"); return None + +def init_feedback_db(): + if not all(os.environ.get(var) for var in ["PG_HOST_DA", "PG_DB_DA", "PG_USER_DA", "PG_PASSWORD_DA"]): + logging.warning("PostgreSQL environment variables not fully set. Feedback DB will not be initialized.") + return False + conn = get_db_connection() + if not conn: logging.error("Cannot init feedback DB: No connection."); return False + try: + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS message_feedback ( + id SERIAL PRIMARY KEY, user_id TEXT NOT NULL, question TEXT NOT NULL, + answer TEXT NOT NULL, feedback TEXT NOT NULL, comment TEXT, + dataset_path TEXT, timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP); + """) + conn.commit() + logging.info("Feedback DB initialized."); return True + except Exception as e: logging.error(f"Error initializing feedback DB table: {e}", exc_info=True); return False + finally: + if conn: conn.close() + + +def load_examples_from_json(json_file_path: Path) -> list[dspy.Example]: + """Loads training examples from a JSON file.""" + examples = [] + if not json_file_path.exists(): + logging.warning(f"Examples JSON file not found: {json_file_path}") + return examples + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + for item in data: + example = dspy.Example( + question=item.get("question"), + context=item.get("context"), + rationale=item.get("rationale"), + final_answer=item.get("final_answer") # This is a JSON string + ).with_inputs("question", "context") + examples.append(example) + logging.info(f"Loaded {len(examples)} examples from {json_file_path}") + except Exception as e: + logging.error(f"Error loading examples from {json_file_path}: {e}", exc_info=True) + return examples + +def validation_metric(example: dspy.Example, prediction: dspy.Prediction, trace=None) -> bool: + """ + to-do: improve this validation metric to be more robust. + """ + try: + pred_dict = json.loads(prediction.final_answer) + + # Basic check: explanation exists and is non-empty + if "explanation" not in pred_dict or not pred_dict["explanation"]: + logging.debug(f"Validation Fail: Missing or empty explanation. Pred: {str(pred_dict)[:100]}") + return False + + # Basic check: plots and files are lists (even if empty) + if not isinstance(pred_dict.get("plots"), list) or not isinstance(pred_dict.get("files"), list): + logging.debug(f"Validation Fail: Plots/files not lists. Pred: {str(pred_dict)[:100]}") + return False + + # Add more sophisticated checks here (e.g., file existence, content comparison) + # For now, a successfully parsed structure with an explanation is a pass. + logging.debug(f"Validation Pass. Pred: {str(pred_dict)[:100]}") + return True + + except json.JSONDecodeError: + logging.debug(f"Validation Fail: Prediction not valid JSON. Pred: {str(prediction.final_answer)[:200]}") + return False + except Exception as e: + logging.error(f"Validation Metric Error: {e}. Pred: {str(prediction.final_answer)[:200]}", exc_info=True) + return False + + +@lru_cache(maxsize=2) +def get_compiled_dspy_agent( + api_key: str, + model_id_with_prefix: str, + outputs_dir: Path, + current_dataset_path: Path | None, + examples_file_path: Path, + compile_agent: bool = True +): + # 1) configure LM + # In get_compiled_dspy_agent + lm = dspy.LM(model_id_with_prefix, api_key=api_key) + dspy.settings.configure(lm=lm, trace=None) + logging.info(f"DSPy LM configured successfully for {model_id_with_prefix}. Tracing set to None (in-memory if used by module).") + + # 2) cache next to the script + script_dir = Path(__file__).resolve().parent + cache_path = script_dir / "compiled_dspy_agent.pkl" + + # 3) try loading + if compile_agent and cache_path.exists(): + try: + with open(cache_path, "rb") as f: + agent = pickle.load(f) + logging.info("Loaded compiled DSPy agent from cache.") + return agent + except Exception as e: + logging.warning(f"Failed to load cached agent ({cache_path}): {e}. Recompiling.") + + # 4) build uncompiled + agent = DataAnalysisAgentModule(outputs_dir=outputs_dir, current_dataset_path=current_dataset_path) + + # 5) compile + pickle + if compile_agent: + examples = load_examples_from_json(examples_file_path) + if examples: + teleprompter = BootstrapFewShot( + metric=validation_metric, + max_bootstrapped_demos=2, + max_labeled_demos=min(len(examples), 4), + ) + try: + compiled = teleprompter.compile(agent, trainset=examples) + # use cloudpickle here + with open(cache_path, "wb") as f: + pickle.dump(compiled, f) + logging.info(f"Compiled DSPy agent and wrote to {cache_path}") + return compiled + except Exception as e: + logging.warning(f"Compilation failed, using uncompiled agent: {e}") + + # 6) fallback + return agent + + + +class NiceGuiApp: + MODEL_OPTIONS_SELECT = { + "openai/gpt-4o": "OpenAI (GPT-4o)", + "openai/gpt-4.1": "OpenAI (GPT-4.1)", + "openai/gpt-4.1-mini": "OpenAI (GPT-4.1-mini)", + "openai/gpt-4o-mini": "OpenAI (GPT-4o-mini)", + "openai/gpt-4-turbo": "OpenAI (GPT-4-Turbo)", + "openai/gpt-3.5-turbo": "OpenAI (GPT-3.5-Turbo)", + "groq/llama3-70b-8192": "Groq (Llama3-70B)", + "groq/mixtral-8x7b-32768": "Groq (Mixtral-8x7B)", + "groq/gemma-7b-it": "Groq (Gemma-7B-IT)" + } + + + def __init__(self, user_id: str, cli_args_ns: argparse.Namespace): + self.user_id = user_id + self.cli_args = cli_args_ns + self.dspy_agent = None # This will be our compiled DSPy agent + self.outputs_dir = Path(self.cli_args.generate_file_path) + (self.outputs_dir / AGENT_GENERATED_FILES_SUBDIR).mkdir(parents=True, exist_ok=True) # Ensure subfolder exists + + self.messages = [] + self.memory = deque(maxlen=30) # Keep conversation history for context + self.bookmarks = [] + + self.current_dataset_file_path: Path | None = None + self.current_dataset_display_name = "No dataset loaded" + self.current_input_data_type = self.cli_args.input_data_type + self.current_data_object = None # Loaded data (e.g., DataFrame) + self.summary_stats_csv_path: Path | None = None # For pandas summaries + self.eda_report_path: Path | None = None # Not used with DSPy agent directly unless agent creates it + self.db_available = False # For feedback DB + + self.openai_api_key = "" + self.groq_api_key = "" + self.selected_model_id = "openai/gpt-4o" # Default model + self.selected_model_name = self.MODEL_OPTIONS_SELECT.get(self.selected_model_id, self.selected_model_id) + + self.selected_message_for_details_idx: int | None = None + self.selected_bookmark_for_details: dict | None = None + + # UI element references + self.chat_container: ui.column | None = None + self.dataset_preview_area: ui.column | None = None + self.sidebar_api_status_label: ui.label | None = None + self.details_container: ui.column | None = None + self.openai_key_input: ui.input | None = None + self.groq_key_input: ui.input | None = None + self.model_select_element: ui.select | None = None + self.chat_input_field: ui.input | None = None + self.left_drawer: ui.left_drawer | None = None + self.bookmarks_container: ui.column | None = None + + self.chat_history_file_path = Path(self.cli_args.chat_history_path) + self.dspy_examples_file = Path(self.cli_args.dspy_examples_path) # From CLI + self.initial_dataset_path_from_arg: Path | None = Path(self.cli_args.input_file_path) if self.cli_args.input_file_path else None + + self.compile_dspy_agent_on_startup = self.cli_args.compile_dspy_agent # From CLI + + self.load_initial_state() + + def load_initial_state(self): + cli_openai_path_str = self.cli_args.cli_openai_key_file_path + if cli_openai_path_str: + cli_openai_key = load_key_from_specific_file(Path(cli_openai_path_str)) + if cli_openai_key: self.openai_api_key = cli_openai_key; save_key_to_specific_file(OPENAI_API_KEY_FILE, cli_openai_key) + if not self.openai_api_key: self.openai_api_key = load_key_from_specific_file(OPENAI_API_KEY_FILE) or "" + + cli_groq_path_str = self.cli_args.cli_groq_key_file_path + if cli_groq_path_str: + cli_groq_key = load_key_from_specific_file(Path(cli_groq_path_str)) + if cli_groq_key: self.groq_api_key = cli_groq_key; save_key_to_specific_file(GROQ_API_KEY_FILE, cli_groq_key) + if not self.groq_api_key: self.groq_api_key = load_key_from_specific_file(GROQ_API_KEY_FILE) or "" + + if self.chat_history_file_path.exists(): + try: + if self.chat_history_file_path.stat().st_size == 0: + logging.warning(f"Chat history file {self.chat_history_file_path} is empty. Starting with fresh history.") + self.messages = [] + self.memory = deque(maxlen=30) + self.bookmarks = [] + else: + with open(self.chat_history_file_path, "r", encoding="utf-8") as f: history = json.load(f) + self.messages = history.get("messages", []) + self.memory = deque(history.get("memory", []), maxlen=30) # Restore memory + self.bookmarks = history.get("bookmarks", []) + + saved_dataset_path_str = history.get("analysis_file_path") + if saved_dataset_path_str: + saved_dataset_path = Path(saved_dataset_path_str) + if saved_dataset_path.exists(): + self.current_dataset_file_path = saved_dataset_path + self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = history.get("input_data_type", self.current_input_data_type) + + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + self.current_dataset_file_path = self.initial_dataset_path_from_arg + self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = self.cli_args.input_data_type + + summary_path_str = history.get("summary_stats_csv_path"); eda_path_str = history.get("eda_report_path") + if summary_path_str and Path(summary_path_str).exists(): self.summary_stats_csv_path = Path(summary_path_str) + if eda_path_str and Path(eda_path_str).exists(): self.eda_report_path = Path(eda_path_str) + + bookmarked_message_timestamps = {bm.get('assistant_response', {}).get('timestamp') for bm in self.bookmarks if bm.get('assistant_response')} + for msg in self.messages: + if msg.get("role") == "assistant" and msg.get("timestamp") in bookmarked_message_timestamps: + msg['bookmarked'] = True + if msg.get("role") == "assistant": + # any existing cost will be kept; missing ones default to None + msg.setdefault("cost", None) + + logging.info(f"Chat history loaded from {self.chat_history_file_path}") + except Exception as e: logging.error(f"Error loading chat history: {e}", exc_info=True) + self.db_available = init_feedback_db() + + + def save_chat_history(self): + history = { + "messages": self.messages, "memory": list(self.memory), # Save memory + "bookmarks": self.bookmarks, + "analysis_file_path": str(self.current_dataset_file_path) if self.current_dataset_file_path else None, + "input_data_type": self.current_input_data_type, + "summary_stats_csv_path": str(self.summary_stats_csv_path) if self.summary_stats_csv_path else None, + "eda_report_path": str(self.eda_report_path) if self.eda_report_path else None, + } + try: + with open(self.chat_history_file_path, "w", encoding="utf-8") as f: json.dump(history, f, indent=2) + logging.info(f"Chat history (including memory and bookmarks) saved to {self.chat_history_file_path}.") + except Exception as e: logging.error(f"Error saving chat history: {e}", exc_info=True) + + def _get_api_key_for_model(self): + """Determines which API key to use based on the selected model provider.""" + if self.selected_model_id.startswith("groq/"): + return self.groq_api_key + # Default to OpenAI for "openai/" prefix or any other case + return self.openai_api_key + + + def try_initialize_agent(self): + """Initializes or re-initializes the DSPy agent with enhanced error handling, using global logging.""" + status_message = "Agent: Unknown" + status_color = 'grey' + agent_outputs_dir = self.outputs_dir + + # Using global logging + logging.info(f"Attempting to initialize DSPy agent with model: {self.selected_model_id}") + + # Get the appropriate API key + final_api_key = self._get_api_key_for_model() + dspy_model_id_for_config = self.selected_model_id + + if not dspy_model_id_for_config: + self.dspy_agent = None + status_message = "Agent Not Ready: No model selected. Please select a model in the sidebar." + status_color = 'red' + ui.notify(status_message, type='negative', multi_line=True, classes='w-96', auto_close=False, position='center') + logging.error(status_message) # Using global logging + elif not final_api_key: + self.dspy_agent = None + provider_name = "the selected provider" + if dspy_model_id_for_config.startswith("openai/"): + provider_name = "OpenAI" + elif dspy_model_id_for_config.startswith("groq/"): + provider_name = "Groq" + status_message = f"Agent Not Ready: API Key for {provider_name} is missing. Please configure it in the sidebar." + status_color = 'red' + ui.notify(status_message, type='negative', multi_line=True, classes='w-96', auto_close=False, position='center') + logging.error(status_message) # Using global logging + else: + logging.debug(f"API Key present for {dspy_model_id_for_config}. Proceeding with DSPy agent initialization.") # Using global logging + try: + logging.info("Compiling DSPy agent with provided API key and model ID...") # Using global logging + self.dspy_agent = get_compiled_dspy_agent( + api_key=final_api_key, + model_id_with_prefix=dspy_model_id_for_config, + outputs_dir=agent_outputs_dir, + current_dataset_path=self.current_dataset_file_path, + examples_file_path=self.dspy_examples_file, + compile_agent=self.compile_dspy_agent_on_startup + ) + + if not self.dspy_agent: + raise RuntimeError("DSPy agent initialization returned None unexpectedly.") + + status_message = f"Agent Ready ({self.selected_model_name})" + status_color = 'green' + + if self.compile_dspy_agent_on_startup and not load_examples_from_json(self.dspy_examples_file): + status_message += " (Uncompiled - No Examples)" + status_color = 'orange' + elif not self.compile_dspy_agent_on_startup: + status_message += " (Uncompiled - By Setting)" + status_color = 'orange' + + ui.notify(status_message, type='positive' if status_color == 'green' else 'warning', timeout=3500, position='top') + logging.info(status_message) # Using global logging + + except Exception as e: + self.dspy_agent = None + # Using global logging + logging.error(f"DSPy Agent initialization/compilation failed for model '{dspy_model_id_for_config}': {e}", exc_info=True) + + error_str = str(e).lower() + auth_keywords = ["authentication", "api key", "invalid key", "permission denied", "unauthorized", "401"] + model_not_found_keywords = ["model_not_found", "does not exist", "404", "no model", "could not find model"] + connection_error_keywords = ["connection", "timeout", "refused", "dns resolution"] + + if any(k in error_str for k in auth_keywords): + status_message = f"Agent Error: API Key for {self.selected_model_name} seems invalid or lacks permissions." + elif any(k in error_str for k in model_not_found_keywords): + status_message = f"Agent Error: Model '{self.selected_model_name}' not found or not accessible." + elif any(k in error_str for k in connection_error_keywords): + status_message = f"Agent Error: Network issue connecting to {self.selected_model_name} provider. Check connection/VPN." + elif "rate limit" in error_str: + status_message = f"Agent Error: Rate limit exceeded for {self.selected_model_name}. Please try again later." + else: + status_message = f"Agent Error: Failed to initialize {self.selected_model_name}. Check server console for details." + + status_color = 'red' + detailed_error_msg = f"{status_message} Details: {str(e)[:150]}..." + ui.notify(detailed_error_msg, type='negative', multi_line=True, classes='w-96 whitespace-pre-wrap', auto_close=False, position='center', close_button='OK') + logging.error(detailed_error_msg) # Using global logging + + if self.sidebar_api_status_label: + self.sidebar_api_status_label.set_text(status_message) + self.sidebar_api_status_label.style(f'color: {status_color}; font-weight: bold; font-size: 0.8rem;') + self.sidebar_api_status_label.tooltip(status_message if len(status_message) > 40 else '') + + return self.dspy_agent is not None + + + async def handle_user_input(self, user_question: str | None): + if not user_question or not user_question.strip(): + ui.notify("Please enter a question.", type='warning') + if self.chat_input_field: self.chat_input_field.set_value(None) + return + + if self.chat_input_field: self.chat_input_field.set_value(None) + + is_load_command = any(keyword in user_question.lower() for keyword in ["load", "upload", "dataset", "file", "open", "use"]) + if not self.current_dataset_file_path and not is_load_command : + ui.notify("Please upload or specify a dataset first, or ask to load one.", type='warning', position='center') + return + + if not self.dspy_agent: + self.try_initialize_agent() # Attempt to initialize if not already + if not self.dspy_agent: + ui.notify("Agent not initialized. Please check API keys and model selection in the sidebar.", type='error', position='center', auto_close=False) + return + + self.messages.append({ + "role": "user", + "content": user_question, + "type": "text", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat() + }) + self.memory.append(f"User: {user_question}") # Add to conversation memory + self.update_chat_display() + + spinner_row_to_delete = None + if self.chat_container: + with self.chat_container: + with ui.row().classes('w-full justify-center my-2') as temp_spinner_row: + ui.spinner(size='lg', color='primary') + spinner_row_to_delete = temp_spinner_row + + parsed_response_dict = {"explanation": "Error processing request.", "plots": [], "files": [], "next_steps_suggestion": []} + formatted_middle_steps = "*Agent did not provide detailed steps or an error occurred during generation.*" + prediction = None + trajectory_data = None + total_cost = None + + try: + # Construct context for DSPy agent + memory_history_str = "\n".join(list(self.memory)[-10:]) # Use last 5 user/assistant exchanges for context + dataset_info_str = "No dataset currently loaded." + if self.current_dataset_file_path: + dataset_info_str = ( + f"Current dataset path for tool: '{self.current_dataset_file_path}'.\n" + f"Dataset type: '{self.current_input_data_type}'.\n" + f"Agent's output directory for saving files: '{self.outputs_dir}'. " + f"Tool must save generated files (plots, CSVs) into 'outputs_dir / \"{AGENT_GENERATED_FILES_SUBDIR.name}\"/' " + f"(e.g., outputs_dir / \"{AGENT_GENERATED_FILES_SUBDIR.name}/plot.png\").\n" + f"Code MUST print the relative path from '{self.outputs_dir}' for any saved file (e.g., print 'generated_file/my_plot.png')." + ) + + agent_context = ( + f"CONVERSATION HISTORY (last few exchanges):\n{memory_history_str}\n\n" + f"DATASET INFORMATION:\n{dataset_info_str}\n\n" + f"PYTHON TOOL ('python_code_executor') INFORMATION:\n" + f"- Input: 'code' (a string of Python code to execute).\n" + f"- Output: STDOUT and any errors from execution.\n" + f"- Environment: The code will have 'outputs_dir' (a Path object to '{self.outputs_dir}') " + f"and 'dataset_path_in_tool_code' (a Path object to '{self.current_dataset_file_path}') available.\n" + f"- Saving Files: The code MUST save any generated files (plots, CSVs) into the subdirectory " + f"'{AGENT_GENERATED_FILES_SUBDIR.name}' within 'outputs_dir'. For example, a plot should be saved to " + f"'outputs_dir / \"{AGENT_GENERATED_FILES_SUBDIR.name}\" / \"my_plot.png\"'.\n" + f"- Outputting Paths: For any file saved, the code MUST print its relative path from 'outputs_dir'. " + f"For example: 'print(\"{AGENT_GENERATED_FILES_SUBDIR.name}/my_plot.png\")'. This is crucial for the UI to find the files." + ) + + logging.info(f"Context for DSPy agent (first 300 chars): {agent_context[:300]}...") + + # Update the PythonCodeTool instance with the current dataset path for this run + if self.dspy_agent and hasattr(self.dspy_agent, 'react_agent') and self.dspy_agent.react_agent.tools: + for tool_instance in self.dspy_agent.react_agent.tools: + if isinstance(tool_instance, PythonCodeTool): + tool_instance.current_dataset_path = self.current_dataset_file_path + tool_instance.outputs_dir = self.outputs_dir # Also ensure outputs_dir is current + logging.info(f"Updated PythonCodeTool with dataset: {self.current_dataset_file_path} and outputs_dir: {self.outputs_dir}") + break + + # Run the DSPy agent in a separate thread + with dspy.context(track_usage=True): + prediction = await asyncio.to_thread( + self.dspy_agent, question=user_question, context=agent_context + ) + + # usage_data = prediction.get_lm_usage() + # logging.info(f"Retrieved usage directly from LM: {usage_data}") + # logging.info(f"before usage,DSPy agent prediction received: {prediction}") + # if usage_data: + # logging.info(f"uage:{usage_data}") + # pricing = MODEL_PRICING.get(self.selected_model_id, {}) + # prompt_cost = usage_data.prompt_tokens * pricing.get("prompt", 0) + # completion_cost = usage_data.completion_tokens * pricing.get("completion", 0) + # total_cost = prompt_cost + completion_cost + + logging.info("Calculating cost by manually aggregating from lm.history...") + total_prompt_tokens = 0 + total_completion_tokens = 0 + cost_calculated = False + + # The history is on the configured language model object itself. + lm_history = dspy.settings.lm.history if hasattr(dspy.settings.lm, 'history') else [] + + if lm_history: + for api_call in lm_history: + # According to the documentation, 'usage' is a direct key in each history entry. + usage_data = api_call.get('usage') + if usage_data: + prompt_tokens = usage_data.get("prompt_tokens", 0) + completion_tokens = usage_data.get("completion_tokens", 0) + total_prompt_tokens += prompt_tokens + total_completion_tokens += completion_tokens + + if total_prompt_tokens > 0 or total_completion_tokens > 0: + pricing = MODEL_PRICING.get(self.selected_model_id, {}) + prompt_cost = total_prompt_tokens * pricing.get("prompt", 0) + completion_cost = total_completion_tokens * pricing.get("completion", 0) + total_cost = prompt_cost + completion_cost + logging.info( + f"SUCCESS: Final cost is ${total_cost:.6f} from " + f"({total_prompt_tokens} prompt + {total_completion_tokens} completion tokens)" + ) + cost_calculated = True + + if not cost_calculated: + logging.error("FAILURE: No usage data was found in any lm.history entries after the call.") + + + # --- Enhanced Debugging for Prediction and Trajectory --- + logging.info(f"--- Prediction Object Start ---") + logging.info(f"Type of prediction: {type(prediction)}") + if prediction is not None: + # logging.info(f"Prediction object dir(): {dir(prediction)}") + # try: + # if hasattr(prediction, '__dict__'): + # logging.info(f"Prediction object vars(): {vars(prediction)}") + # else: + # logging.info(f"Prediction raw content: {str(prediction)[:1000]}") + # except TypeError: + # logging.info(f"Prediction raw content (vars() failed): {str(prediction)[:1000]}") + + if hasattr(prediction, 'trajectory') and prediction.trajectory: + logging.info(f"Found prediction.trajectory. Type: {type(prediction.trajectory)}. Length: {len(prediction.trajectory) if isinstance(prediction.trajectory, list) else 'N/A'}") + logging.info(f"Prediction.trajectory content: {prediction.trajectory}") + trajectory_data = prediction.trajectory + elif hasattr(prediction, 'rationale') and prediction.rationale: + logging.info(f"Found prediction.rationale: {prediction.rationale}") + trajectory_data = prediction.rationale + else: + logging.warning("Neither .trajectory nor .rationale found on prediction object. Checking LLM history as a fallback.") + # Fallback: Inspect the last LLM interaction if direct trajectory is missing + # This is less ideal for ReAct but can give some clues. + if dspy.settings.lm and hasattr(dspy.settings.lm, 'history') and dspy.settings.lm.history: + logging.info(f"Attempting to use dspy.settings.lm.history. Length: {len(dspy.settings.lm.history)}") + # The history contains more raw request/response pairs. + # For ReAct, the 'trajectory' attribute is preferred. + # This is a simplification; parsing full history is complex. + # We'll pass it to format_raw_middle_steps_for_display which can try to make sense of it. + trajectory_data = dspy.settings.lm.history[-1] if dspy.settings.lm.history else None # Get the very last interaction + if trajectory_data: logging.info(f"Using last item from dspy.settings.lm.history: {trajectory_data}") + + + # Uncomment to print detailed LLM history to console during debugging + # logging.info("--- Full DSPy History (last 3 interactions) ---") + # dspy.inspect_history(n=3, disabled=False) + # logging.info("--- End DSPy History ---") + else: + logging.error("Prediction object from agent is None.") + trajectory_data = None + # --- End Enhanced Debugging --- + + formatted_middle_steps = self.format_raw_middle_steps_for_display(trajectory_data) + + if prediction and hasattr(prediction, 'final_answer') and prediction.final_answer: + parsed_response_dict = self.parse_response_content_for_nicegui(prediction.final_answer) + elif prediction: # If no final_answer but prediction exists + logging.warning("Prediction object does not have 'final_answer' attribute or it's empty. Using str(prediction) as explanation.") + # The prediction itself might be the string output if the signature wasn't fully adhered to + parsed_response_dict = self.parse_response_content_for_nicegui(str(prediction)) + else: # Prediction is None or no useful content + parsed_response_dict = {"explanation": "Agent did not return a valid response.", "plots": [], "files": [], "next_steps_suggestion": []} + logging.error("Agent did not return a usable response (prediction is None or lacks final_answer).") + + except Exception as e: + logging.error(f"Error during DSPy agent interaction for '{user_question}': {e}", exc_info=True) + parsed_response_dict = {"explanation": f"An internal error occurred while processing your request: {str(e)}", "plots": [], "files": [], "next_steps_suggestion": []} + if not formatted_middle_steps or formatted_middle_steps.startswith("*Agent did not"): + formatted_middle_steps = f"*Error during agent execution: {str(e)}*" + finally: + if spinner_row_to_delete: + spinner_row_to_delete.delete() + + # --- Path handling for plots and files --- + def make_ui_path(p_str, outputs_dir_base: Path, agent_subdir: Path): + if not isinstance(p_str, str) or not p_str.strip(): + return None + path_obj = Path(p_str) + + # Case 1: Agent returns an absolute path (should not happen if instructed otherwise) + if path_obj.is_absolute(): + try: + # Try to make it relative to the main output directory + rel_path = path_obj.relative_to(outputs_dir_base) + logging.warning(f"Agent returned absolute path '{p_str}', converted to relative '{rel_path}'") + return str(rel_path) + except ValueError: + logging.error(f"Agent returned absolute path '{p_str}' outside of outputs_dir '{outputs_dir_base}'. Cannot process.") + return None # Or handle as error + + # Case 2: Agent returns path already relative to AGENT_GENERATED_FILES_SUBDIR (e.g., "generated_file/plot.png") + if path_obj.parts and path_obj.parts[0] == agent_subdir.name: + return str(path_obj) # Already correct format + + # Case 3: Agent returns path relative to outputs_dir but *not* in AGENT_GENERATED_FILES_SUBDIR (e.g., "plot.png") + # We expect it to be inside AGENT_GENERATED_FILES_SUBDIR as per tool description. + # If it's just a filename, assume it *should* be in the agent's subdir. + if len(path_obj.parts) == 1: # Just a filename like "plot.png" + # Prepend the agent's subdirectory + return str(agent_subdir / path_obj.name) + + # Case 4: Agent returns a path like "subdir_within_generated_file/plot.png" + # This is fine if AGENT_GENERATED_FILES_SUBDIR is the root for such paths. + # For simplicity, we assume agent places files directly in AGENT_GENERATED_FILES_SUBDIR + # or prints paths relative to it. + + logging.warning(f"Path '{p_str}' from agent has unexpected format. Assuming it's relative to '{agent_subdir}'.") + return str(agent_subdir / path_obj) # Fallback assumption + + assistant_plots_raw = parsed_response_dict.get("plots", []) + assistant_files_raw = parsed_response_dict.get("files", []) + + assistant_plots = [make_ui_path(p, self.outputs_dir, AGENT_GENERATED_FILES_SUBDIR) for p in assistant_plots_raw if p] + assistant_plots = [p for p in assistant_plots if p] # Filter out Nones + + assistant_files = [make_ui_path(f, self.outputs_dir, AGENT_GENERATED_FILES_SUBDIR) for f in assistant_files_raw if f] + assistant_files = [f for f in assistant_files if f] # Filter out Nones + # --- End Path handling --- + + assistant_message = { + "role": "assistant", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat(), + "content": parsed_response_dict.get("explanation", "No explanation provided."), + "plots": assistant_plots, + "files": assistant_files, + "middle_steps": formatted_middle_steps, + "type": "text_with_attachments", + "next_steps": parsed_response_dict.get("next_steps_suggestion", []), + "cost": total_cost, + } + self.messages.append(assistant_message) + self.memory.append(f"Assistant: {str(assistant_message['content'])[:200]}...") + + self.update_chat_display() + self.save_chat_history() + + new_assistant_message_idx = len(self.messages) - 1 + if assistant_message.get("plots") or assistant_message.get("files") or \ + (assistant_message.get("middle_steps") and not str(assistant_message.get("middle_steps", "")).startswith("*")): + self.show_details_for_message(new_assistant_message_idx) + + def parse_response_content_for_nicegui(self, final_answer_json_str: str | dict ): + """Parses the JSON string from DSPy agent's final_answer field.""" + if isinstance(final_answer_json_str, dict): # Already a dict + # Ensure standard keys + return { + "explanation": final_answer_json_str.get("explanation", "No explanation provided."), + "plots": final_answer_json_str.get("plots", []), + "files": final_answer_json_str.get("files", []), + "next_steps_suggestion": final_answer_json_str.get("next_steps_suggestion", []) + } + + if not isinstance(final_answer_json_str, str): + logging.warning(f"DSPy final_answer not a string or dict: {type(final_answer_json_str)}. Content: {str(final_answer_json_str)[:200]}") + return {"explanation": f"Unexpected response format: {str(final_answer_json_str)[:100]}", "plots": [], "files": [], "next_steps_suggestion": []} + + try: + # The final_answer from DSPy Signature is expected to be a well-formed JSON string. + # Sometimes, LLMs might wrap it in ```json ... ``` or add preamble/postamble. + # A robust way is to extract the first valid JSON object. + match = re.search(r"\{.*\}", final_answer_json_str, re.DOTALL) + if match: + json_str_to_parse = match.group(0) + else: + json_str_to_parse = final_answer_json_str # Assume it's already a plain JSON string + + parsed = json.loads(json_str_to_parse) + + # Validate structure + if not isinstance(parsed.get("explanation"), (str, list)): # Allow list for multi-line explanations + logging.warning(f"Parsed JSON from DSPy has unexpected 'explanation' type: {type(parsed.get('explanation'))}") + parsed["explanation"] = str(parsed.get("explanation", "Agent provided an explanation in an unexpected format.")) + if isinstance(parsed.get("explanation"), list): # Join list to string if needed by UI + parsed["explanation"] = "\n".join(parsed["explanation"]) + + if not isinstance(parsed.get("plots"), list): + logging.warning(f"Parsed JSON from DSPy has unexpected 'plots' type: {type(parsed.get('plots'))}") + parsed["plots"] = [] + if not isinstance(parsed.get("files"), list): + logging.warning(f"Parsed JSON from DSPy has unexpected 'files' type: {type(parsed.get('files'))}") + parsed["files"] = [] + if not isinstance(parsed.get("next_steps_suggestion"), list): + parsed["next_steps_suggestion"] = [] + + + return parsed + except json.JSONDecodeError as e: + logging.error(f"Error parsing DSPy agent's final_answer JSON: {e}. Content: {final_answer_json_str[:500]}", exc_info=True) + return {"explanation": f"Could not parse LLM's structured response. Raw content: {final_answer_json_str[:200]}", "plots": [], "files": [], "next_steps_suggestion": []} + except Exception as e_gen: + logging.error(f"Generic error parsing DSPy agent's response: {e_gen}. Content: {final_answer_json_str[:500]}", exc_info=True) + return {"explanation": f"Error processing LLM response. Raw content: {final_answer_json_str[:200]}", "plots": [], "files": [], "next_steps_suggestion": []} + + + def format_raw_middle_steps_for_display(self, trajectory_data) -> str: + if not trajectory_data: + return "*No detailed intermediate steps retrieved or trajectory_data is empty.*" + + if isinstance(trajectory_data, str): + return f"#### Agent's Reasoning Log\n```text\n{trajectory_data}\n```" + + # Handle the new dictionary-based trajectory from ReAct + if isinstance(trajectory_data, dict): + logging.info(f"Formatting dictionary-based trajectory: {trajectory_data.keys()}") + formatted_steps_md_parts = ["#### Agent's Workings (Thought, Action, Observation)\n"] + + # Assuming steps are indexed like thought_0, tool_name_0, tool_args_0, observation_0 + # We need to find the maximum index for steps + max_idx = -1 + for key in trajectory_data.keys(): + if key.startswith('thought_') or key.startswith('tool_name_') or key.startswith('tool_args_') or key.startswith('observation_'): + try: + idx = int(key.split('_')[-1]) + if idx > max_idx: + max_idx = idx + except ValueError: + continue + + if max_idx == -1 and 'thought' in trajectory_data: # Simpler, non-indexed ReAct output + current_step_md_parts = [f"\n##### Step 1"] + thought_content = trajectory_data.get("thought") + action_name = trajectory_data.get("action") # Or tool_name + action_input_dict = trajectory_data.get("action_input") # Or tool_args + observation = trajectory_data.get("observation", trajectory_data.get("tool_output")) + + if thought_content: + current_step_md_parts.append(f"**Thought:**\n```text\n{str(thought_content).strip()}\n```") + + if action_name and action_input_dict is not None: + action_input_str_parts = [] + if isinstance(action_input_dict, dict): + for key, value in action_input_dict.items(): + lang = 'python' if key == "code" else 'text' + action_input_str_parts.append(f"**Tool Input ({key}):**\n```{lang}\n{str(value).strip()}\n```") + else: + action_input_str_parts.append(f"**Tool Input:**\n```text\n{str(action_input_dict).strip()}\n```") + current_step_md_parts.append(f"**Action Called:** `{action_name}`\n" + "\n".join(action_input_str_parts)) + elif action_name: + current_step_md_parts.append(f"**Action Called:** `{action_name}` (No detailed input logged)") + + if observation is not None: + obs_str = str(observation) + obs_str = (obs_str[:1500] + "\n... (observation truncated)") if len(obs_str) > 1500 else obs_str + current_step_md_parts.append(f"**Observation/Tool Output:**\n```text\n{obs_str.strip()}\n```") + + if len(current_step_md_parts) > 1: + formatted_steps_md_parts.append("\n\n".join(current_step_md_parts)) + + else: # Indexed steps + for i in range(max_idx + 1): + current_step_md_parts = [f"\n##### Step {i + 1}"] + thought_content = trajectory_data.get(f'thought_{i}') or trajectory_data.get(f'rationale_{i}') + action_name = trajectory_data.get(f'tool_name_{i}') or trajectory_data.get(f'action_{i}') + action_input_dict = trajectory_data.get(f'tool_args_{i}') or trajectory_data.get(f'action_input_{i}') + observation = trajectory_data.get(f'observation_{i}') or trajectory_data.get(f'tool_output_{i}') + + if thought_content: + current_step_md_parts.append(f"**Thought:**\n```text\n{str(thought_content).strip()}\n```") + + if action_name and action_input_dict is not None: + action_input_str_parts = [] + if isinstance(action_input_dict, dict): + for key, value in action_input_dict.items(): + lang = 'python' if key == "code" else 'text' + action_input_str_parts.append(f"**Tool Input ({key}):**\n```{lang}\n{str(value).strip()}\n```") + else: # If action_input_dict is a string (e.g. for some tools) + action_input_str_parts.append(f"**Tool Input:**\n```text\n{str(action_input_dict).strip()}\n```") + current_step_md_parts.append(f"**Action Called:** `{action_name}`\n" + "\n".join(action_input_str_parts)) + elif action_name: + current_step_md_parts.append(f"**Action Called:** `{action_name}` (No detailed input logged)") + + if observation is not None: + obs_str = str(observation) + obs_str = (obs_str[:1500] + "\n... (observation truncated)") if len(obs_str) > 1500 else obs_str + current_step_md_parts.append(f"**Observation/Tool Output:**\n```text\n{obs_str.strip()}\n```") + + if len(current_step_md_parts) > 1: + formatted_steps_md_parts.append("\n\n".join(current_step_md_parts)) + + return "\n\n---\n".join(formatted_steps_md_parts) if len(formatted_steps_md_parts) > 1 else "*No processable steps found in trajectory dictionary.*" + + # Fallback for other types or if the list format is still expected for some cases + if isinstance(trajectory_data, list) and trajectory_data: + # This is just a placeholder to show where it would go. + logging.info("Processing trajectory_data as a list.") + return "*List-based trajectory processing not fully shown here, adapt as needed.*" + + + logging.warning(f"Trajectory data in unexpected format. Type: {type(trajectory_data)}. Content: {str(trajectory_data)[:500]}") + return f"*Trajectory data in unexpected format. Please check server logs for details. Type: {type(trajectory_data)}*" + + + async def on_page_load_actions(self, client: Client): + logging.info(f"Client connected (User: {self.user_id}). Loading initial actions.") + dataset_loaded = False + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + self.current_dataset_file_path = self.initial_dataset_path_from_arg + self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = self.cli_args.input_data_type + ui.notify(f"Loading dataset from arg: {self.current_dataset_display_name}", type='info', timeout=2000) + await self.preview_loaded_or_uploaded_dataset() + dataset_loaded = True + elif self.current_dataset_file_path and self.current_dataset_file_path.exists(): + ui.notify(f"Restoring session with: {self.current_dataset_display_name}", type='info', timeout=2000) + await self.preview_loaded_or_uploaded_dataset() + dataset_loaded = True + + if not dataset_loaded and self.dataset_preview_area: + self.dataset_preview_area.clear() + with self.dataset_preview_area: ui.label("Upload dataset or provide via CLI to start.").classes("text-gray-500 m-2") + + self.update_chat_display() + self.update_details_pane() # Ensure this handles new middle_steps format + self.update_sidebar_bookmarks() + self.try_initialize_agent() # Initialize DSPy agent + + def on_page_unload_actions(self, client: Client): + logging.info(f"Client disconnected (User: {self.user_id}). Saving history.") + self.save_chat_history() + + def handle_model_change(self, e): + self.selected_model_id = e.value + self.selected_model_name = self.MODEL_OPTIONS_SELECT.get(self.selected_model_id, self.selected_model_id) + ui.notify(f"Model set to: {self.selected_model_name}", type='info', position='top-right', timeout=2000) + self.try_initialize_agent() # Re-initialize/re-compile agent + + def save_openai_key(self): + if self.openai_key_input: + self.openai_api_key = self.openai_key_input.value or "" + save_key_to_specific_file(OPENAI_API_KEY_FILE, self.openai_api_key) + ui.notify("OpenAI Key " + ("saved." if self.openai_api_key else "cleared."), type='positive' if self.openai_api_key else 'info') + self.try_initialize_agent() + + def save_groq_key(self): + if self.groq_key_input: + self.groq_api_key = self.groq_key_input.value or "" + save_key_to_specific_file(GROQ_API_KEY_FILE, self.groq_api_key) + ui.notify("Groq Key " + ("saved." if self.groq_api_key else "cleared."), type='positive' if self.groq_api_key else 'info') + self.try_initialize_agent() + + async def run_eda_action(self): + if not self.current_dataset_file_path: + ui.notify("Dataset not ready for EDA. Please upload a dataset.", type='warning'); return + if not self.dspy_agent: + ui.notify("Agent not ready for EDA. Please check configuration.", type='warning'); return + + eda_user_query = ( + "Perform a comprehensive Exploratory Data Analysis (EDA) on the current dataset. " + "Include: summary statistics, missing value analysis, data type identification, " + "a correlation matrix (with heatmap visualization), distributions for numerical features, " + "and counts for categorical-like features. " + "Conclude with 3-5 key insights derived from this analysis. " + ) + ui.notify("Starting Comprehensive EDA...", type='info') + await self.handle_user_input(eda_user_query) + + + def build_ui(self): + ui.add_head_html(""" + + """) + self.left_drawer = ui.left_drawer(elevated=True, top_corner=True, bottom_corner=True)\ + .props('overlay breakpoint=lg').style('background-color: #f4f6f8;')\ + .classes('p-4 w-80 lg:w-96 border-r') + + with ui.header(elevated=True).style('background-color: #303f9f;').classes('items-center text-white q-px-md'): + if self.left_drawer: + ui.button(icon='menu', on_click=self.left_drawer.toggle).props('flat round color=white') + ui.label("Galaxy Chat Analysis").classes("text-xl md:text-2xl font-semibold tracking-wide") + + with self.left_drawer: + with ui.row().classes("w-full items-center justify-between no-wrap mb-2"): + ui.label("Configuration").classes("text-lg font-semibold text-indigo-800") + ui.button(icon='close', on_click=lambda: setattr(self.left_drawer, 'value', False)) \ + .props('flat round dense color=grey-7').tooltip("Close Sidebar") + + self.sidebar_api_status_label = ui.label("Agent: Unknown").classes("mb-3 text-xs p-1 rounded") + self.model_select_element = ui.select(self.MODEL_OPTIONS_SELECT, label="LLM Model", value=self.selected_model_id, on_change=self.handle_model_change).props("outlined dense emit-value map-options").classes("w-full mb-3") + + with ui.expansion("API Keys", icon="key", value=False).classes("w-full mb-3 text-sm"): + self.openai_key_input = ui.input(label="OpenAI API Key", password=True, value=self.openai_api_key, on_change=lambda e: setattr(self, 'openai_api_key', e.value)).props("dense outlined clearable") + ui.button("Save OpenAI", on_click=self.save_openai_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + self.groq_key_input = ui.input(label="Groq API Key", password=True, value=self.groq_api_key, on_change=lambda e: setattr(self, 'groq_api_key', e.value)).props("dense outlined clearable mt-2") + ui.button("Save Groq", on_click=self.save_groq_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + + ui.separator().classes("my-3") + ui.label("Dataset").classes("text-md font-semibold mb-2 text-indigo-700") + ui.upload(label="Upload New Dataset", auto_upload=True, on_upload=self.handle_upload, max_file_size=200 * 1024 * 1024).props("accept=.csv,.tsv,.h5ad,.xlsx,.xls,.json,.parquet,.h5,.fa,.fasta,.vcf,.gtf,.gff,.bed").classes("w-full mb-3") + + ui.label("Analysis Actions").classes("text-md font-semibold mt-3 mb-2 text-indigo-700") + ui.button("Run Full EDA", on_click=self.run_eda_action, icon="query_stats").classes("w-full mb-1").props("color=deep-purple-6 dense") + ui.separator().classes("my-3") + + with ui.expansion("⭐ Bookmarks", icon="bookmarks", value=True).classes("w-full text-sm"): + self.bookmarks_container = ui.column().classes("w-full max-h-96 overflow-y-auto gap-1") + self.update_sidebar_bookmarks() # Initial population + + # Main layout with splitter + with ui.splitter(value=65, reverse=False, limits=(40,70)).classes('w-full h-[calc(100vh-110px)] no-wrap overflow-hidden') as main_splitter: + with main_splitter.before: # Chat panel + with ui.column().classes("w-full h-full p-0 flex flex-col no-wrap items-stretch overflow-hidden min-h-0"): + self.chat_container = ui.column().classes("w-full flex-grow overflow-y-auto p-2 md:p-3 bg-gray-100 min-h-0") # Chat messages + + # with ui.row().classes("w-full px-2 pt-2 bg-slate-200 items-center border-t flex-shrink-0"): # Input row + # self.chat_input_field = ui.input(placeholder="Ask about the dataset...")\ + # .props("bg-color=white outlined dense clearable rounded").classes("flex-grow")\ + # .on('keydown.enter', lambda: self.handle_user_input(self.chat_input_field.value), throttle=0.5) + # ui.button(icon="send", on_click=lambda: self.handle_user_input(self.chat_input_field.value))\ + # .props("round color=indigo-6 dense unelevated") + + # ui.label("Outputs may require verification.") \ + # .classes("w-full text-xs text-gray-600 px-1 pb-1 text-center bg-slate-100 border-t flex-shrink-0") + with ui.column().classes("w-full p-2 bg-slate-200 border-t flex-shrink-0 gap-0"): + + # First item in the column: a row for the input and button + with ui.row().classes("w-full items-center no-wrap"): + self.chat_input_field = ui.input(placeholder="Ask about the dataset...")\ + .props("bg-color=white outlined dense clearable rounded").classes("flex-grow")\ + .on('keydown.enter', lambda: self.handle_user_input(self.chat_input_field.value), throttle=0.5) + ui.button(icon="send", on_click=lambda: self.handle_user_input(self.chat_input_field.value))\ + .props("round color=indigo-6 dense unelevated") + + # Second item in the column: the verification label, now inside the same container + ui.label("Outputs may require verification.") \ + .classes("w-full text-xs text-gray-600 text-center pt-1") + + with main_splitter.after: # Details and Preview panel + with ui.column().classes("w-full h-full items-stretch overflow-y-auto bg-slate-50 p-0"): + ui.label("Details & Preview").classes("text-md font-semibold text-gray-700 sticky top-0 bg-slate-100/95 backdrop-blur-sm z-10 p-3 border-b shadow-sm") + with ui.column().classes("p-2 md:p-3 flex-grow w-full"): + self.details_container = ui.column().classes("w-full flex-grow p-2 border rounded-lg bg-white shadow mt-2 min-h-[200px]") + self.dataset_preview_area = ui.column().classes("w-full mb-3 p-2 border rounded-lg bg-white shadow") + + + # Keyboard listener for closing drawer + ui.keyboard(self._handle_drawer_escape_key) + + app.on_connect(self.on_page_load_actions) + app.on_disconnect(self.on_page_unload_actions) + + + async def handle_upload(self, e: UploadEventArguments): + if not e.content: + ui.notify("No file content.", type='negative') + return + uploaded_filename = e.name + # Determine file type - use suffix or allow user to specify later + self.current_input_data_type = Path(uploaded_filename).suffix.lower().replace('.', '') + if not self.current_input_data_type: # Fallback if no suffix + self.current_input_data_type = "csv" # Or ask user + ui.notify(f"Could not determine file type for {uploaded_filename}, assuming CSV. You can change this if needed.", type='warning') + + # Save to outputs_dir/ (not AGENT_GENERATED_FILES_SUBDIR, as this is user upload) + temp_file_path = self.outputs_dir / uploaded_filename + try: + with open(temp_file_path, 'wb') as f: + f.write(e.content.read()) + + self.current_dataset_file_path = temp_file_path + self.current_dataset_display_name = uploaded_filename + + # Update PythonCodeTool's dataset path if agent is already initialized + if self.dspy_agent and hasattr(self.dspy_agent, 'react_agent') and self.dspy_agent.react_agent.tools: + for tool in self.dspy_agent.react_agent.tools: + if isinstance(tool, PythonCodeTool): + tool.current_dataset_path = self.current_dataset_file_path + logging.info(f"PythonCodeTool dataset path updated to: {self.current_dataset_file_path}") + + ui.notify(f"File '{uploaded_filename}' uploaded and set as current dataset.", type='positive') + self.summary_stats_csv_path = None + self.eda_report_path = None + + self.messages.append({ + "role": "system", + "content": f"New dataset loaded: {uploaded_filename}. Its path is '{self.current_dataset_file_path}'. Type: '{self.current_input_data_type}'. Please analyze.", + "type": "text", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat() + }) + self.update_chat_display() + await self.preview_loaded_or_uploaded_dataset() + self.save_chat_history() + except Exception as ex: + ui.notify(f"Error processing '{uploaded_filename}': {ex}", type='negative', multi_line=True) + logging.error(f"Upload error for {uploaded_filename}: {ex}", exc_info=True) + self.current_dataset_file_path = None + self.current_dataset_display_name = "No dataset" + await self.preview_loaded_or_uploaded_dataset() # Update preview to show no dataset + + def load_data_object_from_path(self, file_path: Path, data_type: str): + try: + if not file_path or not file_path.exists(): + logging.warning(f"File not found for loading: {file_path}") + return None + logging.info(f"Loading data from {file_path} as type {data_type}") + if data_type == 'csv': return pd.read_csv(file_path) + elif data_type == "tsv": return pd.read_csv(file_path, sep="\t") + elif data_type == "h5ad": import anndata; return anndata.read_h5ad(file_path) + elif data_type in ("xlsx", "xls"): return pd.read_excel(file_path) + # ... (to-do: need include all other data type loaders) ... + else: raise ValueError(f"Unsupported file type for direct load: {data_type}") + except ImportError as ie: + ui.notify(f"Missing library for {data_type}: {ie}. Please install it.", type='error', multi_line=True, auto_close=False) + logging.error(f"ImportError loading {file_path.name if file_path else 'N/A'} ({data_type}): {ie}", exc_info=True) + return None + except Exception as e: + ui.notify(f"Error loading {file_path.name if file_path else 'N/A'} ({data_type}): {e}", type='negative', multi_line=True, auto_close=False) + logging.error(f"Load error for {file_path} ({data_type}): {e}", exc_info=True) + return None + + async def preview_loaded_or_uploaded_dataset(self): + # (This method should be largely the same, ensure it uses self.outputs_dir and AGENT_GENERATED_FILES_SUBDIR correctly for summary paths) + if not self.current_dataset_file_path or not self.dataset_preview_area: + if self.dataset_preview_area: + self.dataset_preview_area.clear() + with self.dataset_preview_area: ui.label("No dataset selected or loaded.").classes("text-gray-500 m-2") + self.current_data_object = None # Clear data object + return + + self.dataset_preview_area.clear() + with self.dataset_preview_area: + ui.label(f"Active: {self.current_dataset_display_name} ({self.current_input_data_type.upper()})").classes('text-md font-semibold mb-1') + + self.current_data_object = self.load_data_object_from_path(self.current_dataset_file_path, self.current_input_data_type) + + if self.current_data_object is None: + ui.label("Failed to load data for preview. Check file type or content.").classes('text-red-500') + return + + if isinstance(self.current_data_object, pd.DataFrame): + ui.markdown("###### Data Preview (Top 5 Rows)"); + ui.table.from_pandas(self.current_data_object.head(5)).classes('h-[200px] max-h-[200px] overflow-auto w-full bordered').props('dense flat bordered separator=cell') + + # Generate summary if not already present or file missing + summary_dir = self.outputs_dir / AGENT_GENERATED_FILES_SUBDIR + summary_dir.mkdir(parents=True, exist_ok=True) # Ensure subdir for summaries exists + expected_summary_filename_stem = f"summary_stats_for_{Path(self.current_dataset_display_name).stem}" + # Search for existing summary rather than relying on exact UUID name + existing_summaries = list(summary_dir.glob(f"{expected_summary_filename_stem}*.csv")) + + if existing_summaries and existing_summaries[0].exists(): + self.summary_stats_csv_path = existing_summaries[0] + elif not self.summary_stats_csv_path or not self.summary_stats_csv_path.exists(): + self.summary_stats_csv_path = self.generate_and_save_pandas_summary_csv(self.current_data_object) + + if self.summary_stats_csv_path and self.summary_stats_csv_path.exists(): + try: + summary_df = pd.read_csv(self.summary_stats_csv_path, index_col=0) + ui.markdown("###### Summary Statistics").classes('mt-2') + ui.table.from_pandas(summary_df).classes('h-[280px] max-h-[280px] overflow-auto w-full bordered').props('dense flat bordered separator=cell') + ui.button("Download Summary", icon="download", on_click=lambda: ui.download(str(self.summary_stats_csv_path), filename=self.summary_stats_csv_path.name)).props("dense size=sm flat").classes("mt-1 text-sm text-indigo-600 hover:text-indigo-800") + except Exception as e: + ui.notify(f"Error displaying summary: {e}", type='warning') + logging.warning(f"Error displaying summary CSV from {self.summary_stats_csv_path}: {e}") + # ... (add previews for other data types like anndata, fasta, vcf, gff) ... + else: + ui.label(f"Enhanced preview for {self.current_input_data_type.upper()} not fully shown here, but data object loaded.") + + def generate_and_save_pandas_summary_csv(self, dataframe: pd.DataFrame) -> Path | None: + + if not isinstance(dataframe, pd.DataFrame): return None + original_filename_stem = Path(self.current_dataset_display_name).stem if self.current_dataset_display_name and self.current_dataset_display_name != "No dataset loaded" else "dataset" + try: + summary_df = dataframe.describe(include='all') + summary_filename = f"summary_stats_for_{original_filename_stem}_{uuid.uuid4().hex[:6]}.csv" + # Save summary stats into the agent's generated files subdirectory + summary_csv_path = self.outputs_dir / AGENT_GENERATED_FILES_SUBDIR / summary_filename + summary_csv_path.parent.mkdir(parents=True, exist_ok=True) # Ensure directory exists + summary_df.to_csv(summary_csv_path, index=True) + logging.info(f"Pandas summary saved: {summary_csv_path}") + return summary_csv_path + except Exception as e: + logging.error(f"Error generating/saving pandas summary: {e}", exc_info=True) + ui.notify(f"Error in summary stats generation: {e}", type='negative') + return None + + def update_sidebar_bookmarks(self): + if not self.bookmarks_container: return + self.bookmarks_container.clear() + with self.bookmarks_container: + if not self.bookmarks: + ui.label("No bookmarks yet.").classes("text-xs text-gray-500 p-2 text-center") + else: + for idx, bookmark in enumerate(self.bookmarks): + user_q = bookmark.get("user_question", "Bookmarked Item") + assistant_resp = bookmark.get("assistant_response", {}) + assistant_content_snippet = str(assistant_resp.get("content", ""))[:70] + "..." + with ui.card().tight().classes("w-full my-1 shadow-md hover:shadow-lg transition-shadow cursor-pointer"): + with ui.card_section().classes("p-2"): + with ui.row().classes("w-full items-center justify-between no-wrap"): + with ui.column().classes("flex-grow").on('click', lambda b=bookmark: self.show_bookmark_details(b)): + ui.label(f"Q: {user_q[:50]}...").classes("text-xs font-semibold text-indigo-700").style("white-space: normal; word-break: break-word; line-height: 1.2;") + ui.label(f"A: {assistant_content_snippet}").classes("text-xs text-gray-600 mt-1").style("white-space: normal; word-break: break-word; line-height: 1.2;") + ui.button(icon='delete_sweep', on_click=lambda i=idx: self.delete_bookmark(i), color='red-5') \ + .props('flat round dense size=xs').tooltip("Delete bookmark") + + + def delete_bookmark(self, bookmark_idx: int): + if 0 <= bookmark_idx < len(self.bookmarks): + deleted_bookmark = self.bookmarks.pop(bookmark_idx) + # Unmark original message + deleted_timestamp = deleted_bookmark.get("assistant_response", {}).get("timestamp") + if deleted_timestamp: + for msg in self.messages: + if msg.get("role") == "assistant" and msg.get("timestamp") == deleted_timestamp: + msg.pop('bookmarked', None) + break + ui.notify("Bookmark removed.", type='info') + self.save_chat_history() + self.update_sidebar_bookmarks() + self.update_chat_display() + if self.selected_bookmark_for_details and self.selected_bookmark_for_details.get("assistant_response", {}).get("timestamp") == deleted_timestamp: + self.selected_bookmark_for_details = None + self.update_details_pane() + else: + ui.notify("Could not delete bookmark (invalid index).", type='negative') + + + def show_details_for_message(self, message_idx: int): + self.selected_message_for_details_idx = message_idx + self.selected_bookmark_for_details = None + if self.details_container: + self.update_details_pane() + else: + logging.warning("Details container not initialized for message details.") + + def show_bookmark_details(self, bookmark_data: dict): + self.selected_bookmark_for_details = bookmark_data + self.selected_message_for_details_idx = None + if self.details_container: + self.update_details_pane() + else: + logging.warning("Details container not initialized for bookmark details.") + + def add_bookmark(self, message_idx: int): + if 0 <= message_idx < len(self.messages) and self.messages[message_idx].get("role") == "assistant": + assistant_msg = self.messages[message_idx] + if assistant_msg.get('bookmarked'): + ui.notify("This response is already bookmarked.", type='info'); return + + user_question = "Context not found" + for i in range(message_idx - 1, -1, -1): + if self.messages[i].get("role") == "user": + user_question = self.messages[i].get("content", "User query not found"); break + + bookmark_data = { + "user_question": user_question, + "assistant_response": { + "content": assistant_msg.get("content"), + "plots": assistant_msg.get("plots", []), + "files": assistant_msg.get("files", []), + "middle_steps": assistant_msg.get("middle_steps"), # Important for DSPy + "timestamp": assistant_msg.get("timestamp") + } + } + self.bookmarks.append(bookmark_data) + self.messages[message_idx]['bookmarked'] = True + ui.notify("Response bookmarked!", type='positive') + self.save_chat_history() + self.update_sidebar_bookmarks() + self.update_chat_display() + else: + ui.notify("Could not bookmark this message.", type='negative') + + def update_details_pane(self): + if not self.details_container: + logging.warning("NiceGuiApp: Details container NA for update_details_pane.") + return + self.details_container.clear() + + source_data = None + data_origin = None # 'live_message', 'bookmark', or 'live_message_default' + + # --- Logic to determine source_data and data_origin --- + if self.selected_bookmark_for_details: + source_data = self.selected_bookmark_for_details + data_origin = 'bookmark' + elif self.selected_message_for_details_idx is not None and \ + self.selected_message_for_details_idx < len(self.messages): + source_data = self.messages[self.selected_message_for_details_idx] + data_origin = 'live_message' + elif self.selected_message_for_details_idx is None: # Default to last message with details + for i in range(len(self.messages) - 1, -1, -1): + msg = self.messages[i] + if msg.get("role") == "assistant" and \ + (msg.get("plots") or msg.get("files") or \ + (msg.get("middle_steps") and not str(msg.get("middle_steps")).startswith("*No"))): + source_data = msg + data_origin = 'live_message_default' + break + + with self.details_container: + if not source_data: + ui.label("Select 'View Details' or a bookmark for details.").classes("text-gray-500 m-4 italic text-center") + return + + # --- Initialize variables to hold content from source_data --- + plots_raw = [] + files_raw = [] + middle_steps_content = None + user_query_for_display = "N/A" + assistant_content_for_display = "N/A" # For the main explanation + + if data_origin == 'live_message' or data_origin == 'live_message_default': + msg_data = source_data + ui.markdown("##### Agent Response Details") + # Find associated user query + current_idx = self.messages.index(msg_data) if msg_data in self.messages else -1 + if current_idx > 0: + for idx_q in range(current_idx - 1, -1, -1): + if self.messages[idx_q].get("role") == "user": + user_query_for_display = self.messages[idx_q].get("content", "N/A") + break + + assistant_content_for_display = msg_data.get("content", "No explanation provided.") + plots_raw = msg_data.get("plots", []) + files_raw = msg_data.get("files", []) + middle_steps_content = msg_data.get("middle_steps") + + elif data_origin == 'bookmark': + ui.markdown("##### Bookmarked Item Details") + user_query_for_display = source_data.get("user_question", "N/A") + assistant_resp = source_data.get("assistant_response", {}) + assistant_content_for_display = assistant_resp.get("content", "No explanation provided.") + plots_raw = assistant_resp.get("plots", []) + files_raw = assistant_resp.get("files", []) + middle_steps_content = assistant_resp.get("middle_steps") + + # --- Display User Query and Assistant's Main Explanation --- + if user_query_for_display != "N/A": + ui.markdown("###### Regarding Query:").classes("text-gray-700 font-semibold mt-1 text-sm") + ui.markdown(f"{user_query_for_display[:250]}{'...' if len(user_query_for_display)>250 else ''}").classes("text-gray-800 p-2 text-sm bg-slate-100 rounded-md border") + + # Display main explanation from assistant (if not already part of middle steps or other detailed views) + # This assumes 'content' field holds the primary textual explanation. + # if assistant_content_for_display != "N/A": + # ui.markdown("###### Agent's Explanation:").classes("text-gray-700 font-semibold mt-2 text-sm") + # ui.markdown(str(assistant_content_for_display)).classes("text-sm link-styling p-1") # Added p-1 for slight padding + + ui.separator().classes("my-3") + + # --- Display Middle Steps --- + formatted_middle_steps_str = self.format_raw_middle_steps_for_display(middle_steps_content) + if formatted_middle_steps_str and not formatted_middle_steps_str.startswith("*No"): + with ui.expansion("Agent's Workings", icon="list_alt", value=True).classes("w-full my-2 border rounded shadow-sm"): + with ui.card_section().classes("bg-gray-50 p-2"): # Ensure card_section for proper styling + ui.markdown(formatted_middle_steps_str).classes('middle-steps-content') # Ensure .middle-steps-content CSS is defined + elif middle_steps_content: + ui.markdown("###### Agent Workings (Raw/Fallback):").classes("mt-2 text-sm") + ui.markdown(f"```text\n{str(middle_steps_content)[:1000]}\n```").classes("text-xs text-gray-600 bg-slate-50 p-2 border rounded") + + # --- Display Plots --- + plots_to_display = [self.outputs_dir / p for p in plots_raw if p and isinstance(p, (str, Path)) and (self.outputs_dir / p).is_file()] + if plots_to_display: + ui.markdown("###### Plots").classes("mt-3 text-base font-medium text-gray-700") # Enhanced heading + with ui.grid(columns=1).classes("gap-3 w-full"): # slightly more gap + for p_path in plots_to_display: + with ui.card().tight().classes("w-full shadow-md rounded-lg overflow-hidden"): # Added rounded-lg and overflow-hidden + try: + ui.image(str(p_path)).classes('max-w-full h-auto object-contain border-b') # border-b for separation + with ui.card_actions().props("align=right").classes("bg-slate-50 px-2 py-1"): # Actions with slight background + ui.button(icon="download", on_click=lambda current_path=str(p_path): ui.download(current_path, filename=Path(current_path).name)) \ + .props("flat dense size=sm color=primary round").tooltip("Download Plot") + except Exception as e_img: + logging.error(f"Error displaying image {p_path}: {e_img}") + with ui.card_section().classes("p-2"): + ui.label(f"Could not display plot: {p_path.name}").classes('text-red-500 text-xs') + + # --- Display Files (with corrected CSV table display) --- + # files_to_display = [self.outputs_dir / f for f in files_raw if f and isinstance(f, (str, Path)) and (self.outputs_dir / f).is_file()] + # if files_to_display: + # ui.markdown("###### Files & Data").classes("mt-3 text-base font-medium text-gray-700") # Enhanced heading + # for f_path in files_to_display: + # with ui.card().tight().classes("my-2 w-full shadow-md rounded-lg overflow-hidden"): # Added rounded-lg and overflow-hidden + # with ui.card_section().classes("flex justify-between items-center p-3 bg-slate-50 border-b"): # Slightly more padding and border + # ui.label(f_path.name).classes("font-semibold text-sm text-gray-800") + # ui.button(icon="download", on_click=lambda current_path=str(f_path): ui.download(current_path, filename=Path(current_path).name)) \ + # .props("flat dense size=sm color=primary round").tooltip("Download File") + + # if f_path.suffix.lower() in ['.csv', '.tsv']: + # # This div will handle the horizontal scrolling for the table + # with ui.element('div').classes('w-full overflow-auto'): # overflow-auto handles both x and y if needed + # try: + # df_prev = pd.read_csv(f_path, sep=',' if f_path.suffix.lower() == '.csv' else '\t') + # # The table itself is simple. The parent div provides scrolling. + # ui.table.from_pandas(df_prev.head(5)) \ + # .props('dense flat bordered separator=cell') \ + # .style('font-size: 0.75rem; min-width: 600px;') + # # min-width on table can encourage scrollbar if content is narrower than this. + # # Adjust 600px as needed or make it a percentage like '150%' if appropriate. + # except Exception as e_df_prev: + # logging.error(f"Error previewing table {f_path}: {e_df_prev}") + # with ui.card_section().classes("p-2"): # Add section for error message + # ui.label(f"Preview failed for {f_path.name}: {str(e_df_prev)[:100]}").classes('text-orange-500 text-xs') + + # elif f_path.suffix.lower() == '.html': + # with ui.card_section().classes("p-1 border-t"): # Added border-t + # try: + # with open(f_path, 'r', encoding='utf-8') as f_html_content: + # html_content_str = f_html_content.read() + # ui.html(html_content_str).classes('max-h-96 h-[350px] overflow-auto border w-full rounded') # Added rounded + # except Exception as e_html_display: + # logging.error(f"Error displaying HTML {f_path}: {e_html_display}") + # ui.label(f"HTML display failed for {f_path.name}: {e_html_display}").classes('text-orange-500 text-xs p-2') + # --- Display Files (with conditional preview for CSV/TSV) --- + files_to_display = [self.outputs_dir / f for f in files_raw if f and isinstance(f, (str, Path)) and (self.outputs_dir / f).is_file()] + if files_to_display: + ui.markdown("###### Files & Data").classes("mt-3 text-base font-medium text-gray-700") + for f_path in files_to_display: + # Check if the file is a CSV or TSV + if f_path.suffix.lower() in ['.csv', '.tsv']: + # --- Logic for CSV/TSV files (with preview) --- + with ui.card().tight().classes("my-2 w-full shadow-md rounded-lg overflow-hidden"): + # Header with filename and download button + with ui.card_section().classes("flex justify-between items-center p-3 bg-slate-50 border-b"): + ui.label(f_path.name).classes("font-semibold text-sm text-gray-800") + ui.button(icon="download", on_click=lambda current_path=str(f_path): ui.download(current_path, filename=Path(current_path).name)) \ + .props("flat dense size=sm color=primary round").tooltip("Download File") + + # Container for the table preview + with ui.element('div').classes('w-full overflow-auto'): + try: + df_prev = pd.read_csv(f_path, sep=',' if f_path.suffix.lower() == '.csv' else '\t') + # Display the preview table + ui.table.from_pandas(df_prev.head(5)) \ + .props('dense flat bordered separator=cell') \ + .style('font-size: 0.75rem; min-width: 600px;') + except Exception as e_df_prev: + logging.error(f"Error previewing table {f_path}: {e_df_prev}") + with ui.card_section().classes("p-2"): + ui.label(f"Preview failed for {f_path.name}").classes('text-orange-500 text-xs') + else: + # --- Logic for ALL OTHER file types (download only) --- + with ui.card().tight().classes("my-2 w-full shadow-md rounded-lg"): + with ui.card_section().classes("flex justify-between items-center p-3 bg-slate-50"): + ui.label(f_path.name).classes("font-semibold text-sm text-gray-800") + ui.button(icon="download", on_click=lambda current_path=str(f_path): ui.download(current_path, filename=Path(current_path).name)) \ + .props("flat dense size=sm color=primary round").tooltip("Download File") + if source_data.get("cost") is not None: + ui.markdown(f"**API cost for this query:** ${source_data['cost']:.4f}") \ + .classes("mt-3 text-sm text-gray-600") + + # --- Fallback message if no content to display --- + if not plots_to_display and not files_to_display and \ + not (formatted_middle_steps_str and not formatted_middle_steps_str.startswith("*No")): + ui.label("No specific plots, files, or detailed agent workings for this message.").classes("m-4 text-gray-500 italic text-center") + + def update_chat_display(self): + if not self.chat_container: + logging.warning("Chat container not available for update_chat_display.") + return + + self.chat_container.clear() + with self.chat_container: + for i, msg_data in enumerate(self.messages): + role, is_user = msg_data.get("role", "assistant"), msg_data.get("role") == "user" + name = self.user_id if is_user else "Agent" + # avatar_char = "name[0].upper() if name else ('U' if is_user else 'A')" + + chat_message_props = ( + f"text-color=black bg-color={'blue-1' if is_user else 'grey-2'} " + f"name-color={'indigo-8' if is_user else 'deep-purple-8'}" + ) + + with ui.chat_message( + name=name, + sent=is_user, + avatar='/static/user.png' if is_user else '/static/agent.png' + ).props(chat_message_props).classes('w-full rounded-lg shadow-sm'): + + with ui.column().classes('w-full no-wrap pa-0 ma-0'): + original_content_value = msg_data.get("content", "") + msg_type = msg_data.get("type", "text") # type includes "text_with_attachments", "text_with_candidates" + + final_content_for_markdown: str + if isinstance(original_content_value, list): + final_content_for_markdown = "\\n".join(map(str, original_content_value)) # Use \\n for markdown newlines + else: + final_content_for_markdown = str(original_content_value) + + ui.markdown(final_content_for_markdown).classes('text-sm link-styling') + + # Custom client-side timestamp rendering + raw_timestamp_str = msg_data.get("timestamp") + if raw_timestamp_str and isinstance(raw_timestamp_str, str) and raw_timestamp_str.strip(): + timestamp_dom_id = f"custom_ts_element_{uuid.uuid4().hex[:8]}" + ui.html(f'
') + + js_code_to_format_stamp = f""" + (function() {{ + var el = document.getElementById('{timestamp_dom_id}'); + var utcTimestampStr = '{raw_timestamp_str}'; + if (el) {{ + try {{ + var date = new Date(utcTimestampStr); + if (!isNaN(date.getTime())) {{ + el.textContent = date.toLocaleTimeString(undefined, {{ hour: 'numeric', minute: '2-digit', hour12: true }}); + }} else {{ el.textContent = ''; }} + }} catch (e) {{ el.textContent = ''; }} + }} + }})(); + """ + ui.timer(0.15, lambda code=js_code_to_format_stamp: ui.run_javascript(code), once=True) + + # Buttons for assistant messages + if role == "assistant": + with ui.row().classes("items-center -ml-1 mt-1 gap-x-1"): # Action buttons row + has_details_content = ( + bool(msg_data.get("plots")) or + bool(msg_data.get("files")) or + (msg_data.get("middle_steps") and not str(msg_data.get("middle_steps")).startswith("*No")) + ) + if has_details_content: + ui.button("View Details", icon="table_chart", on_click=lambda bound_idx=i: self.show_details_for_message(bound_idx))\ + .props('flat color=teal rounded size=sm').classes('text-xs px-2 py-0.5') + + if not msg_data.get('bookmarked'): + ui.button(icon="bookmark_add", on_click=lambda msg_idx=i: self.add_bookmark(msg_idx))\ + .props("flat dense round color=amber-8 size=sm").tooltip("Bookmark this response") + else: + ui.icon("bookmark_added", color="amber-8 size-5").classes("ml-1 cursor-default").tooltip("Bookmarked") + + # Display candidate solutions if present (example structure) + if msg_type == "text_with_candidates" and msg_data.get("candidates"): + for cand_idx, cand in enumerate(msg_data.get("candidates", [])): + with ui.expansion(f"Candidate {cand_idx+1}: {cand.get('option', 'Option')}", icon='ballot').classes('w-full my-1 text-xs shadow-sm rounded-md border'): + ui.markdown(f"**Expl:** {cand.get('explanation', '')[:150]}...").classes("p-1") + if self.chat_input_field: + ui.button("Use this", icon='check_circle_outline', + on_click=lambda c=cand, ci=self.chat_input_field: ( + ci.set_value(f"Regarding candidate '{c.get('option','')}': {c.get('explanation','')}... Please proceed."), + ci.run_method('focus')) + ).props(f'flat dense size=xs key="refine_{i}_{cand_idx}"').classes("m-1") + + # Display next steps suggestions + if msg_data.get("next_steps"): + with ui.row().classes("mt-2 gap-1 flex-wrap items-center"): + ui.markdown("**Next:**").classes("self-center text-xs mr-1 text-gray-700") + for step_idx, step in enumerate(msg_data["next_steps"][:3]): # Show up to 3 + if self.chat_input_field: + ui.button(step, + on_click=lambda s=step, ci=self.chat_input_field: (ci.set_value(s), ci.run_method('focus'))) \ + .props(f'flat dense no-caps key="next_step_{i}_{step_idx}"') \ + .classes('text-sm bg-indigo-50 hover:bg-indigo-100 text-indigo-700 rounded-full px-3 py-1') + + # Scroll to bottom logic + def scroll_chat_to_bottom_js(): + if self.chat_container and self.chat_container.client.has_socket_connection: + chat_id = self.chat_container.id + js_command = f"var el = getElement({chat_id}); if (el) {{ el.scrollTop = el.scrollHeight; }}" + ui.run_javascript(js_command) + ui.timer(0.1, scroll_chat_to_bottom_js, once=True) + + def _handle_drawer_escape_key(self, e): + try: + key_obj = getattr(e, 'key', None) + action_obj = getattr(e, 'action', None) + is_escape = False + if key_obj: + if hasattr(key_obj, 'escape') and key_obj.escape is True: is_escape = True + elif hasattr(key_obj, 'name') and isinstance(key_obj.name, str) and key_obj.name.lower() == 'escape': is_escape = True + is_keydown = False + if action_obj and hasattr(action_obj, 'keydown') and action_obj.keydown is True: is_keydown = True + if is_escape and is_keydown: + if self.left_drawer and self.left_drawer.value: self.left_drawer.value = False + except AttributeError: pass + + +# --- CLI Argument Parsing & App Run --- +if __name__ in {"__main__", "__mp_main__"}: + parser = argparse.ArgumentParser(description="Galaxy Chat Analysis with DSPy and NiceGUI") + parser.add_argument("--user_id", nargs='?', default=f"user_{uuid.uuid4().hex[:6]}", help="User ID (defaults to a random ID).") + parser.add_argument("--openai_key_file", dest="cli_openai_key_file_path", help="Path to OpenAI API key file.") + parser.add_argument("--groq_key_file", dest="cli_groq_key_file_path", help="Path to Groq API key file.") + parser.add_argument("--chat_history", dest="chat_history_path", default=str(DEFAULT_CHAT_HISTORY_FILE), help="Path to chat history JSON file.") + parser.add_argument("--outputs_dir", dest="generate_file_path", default=str(DEFAULT_outputs_dir), help="Directory for generated files (plots, data).") + parser.add_argument("--input_file", dest="input_file_path", help="Path to an initial dataset file to load.") + parser.add_argument("--input_type", dest="input_data_type", default="csv", help="Type of the initial dataset file (e.g., csv, tsv, h5ad).") + parser.add_argument("--dspy_examples", dest="dspy_examples_path", default=str(DEFAULT_DSPY_EXAMPLES_FILE), help="Path to DSPy training examples JSON file.") + parser.add_argument("--compile_dspy", dest="compile_dspy_agent", action=argparse.BooleanOptionalAction, default=True, help="Enable/disable DSPy agent compilation on startup.") + + + cli_args = parser.parse_args() + + parsed_outputs_dir = Path(cli_args.generate_file_path) + + # Ensure output directory exists + Path(cli_args.generate_file_path).mkdir(parents=True, exist_ok=True) + (Path(cli_args.generate_file_path) / AGENT_GENERATED_FILES_SUBDIR).mkdir(parents=True, exist_ok=True) + + # dspy_cache_path = parsed_outputs_dir / ".dspy_cache" + # dspy_cache_path.mkdir(parents=True, exist_ok=True) + # os.environ["DSPY_CACHE_DIR"] = str(dspy_cache_path) + # logging.info(f"DSPY_CACHE_DIR explicitly set to: {dspy_cache_path.resolve()}") + + # # Configure Matplotlib cache directory + # matplotlib_cache_path = parsed_outputs_dir / ".matplotlib_cache" + # matplotlib_cache_path.mkdir(parents=True, exist_ok=True) + # os.environ["MPLCONFIGDIR"] = str(matplotlib_cache_path) + # logging.info(f"MPLCONFIGDIR explicitly set to: {matplotlib_cache_path.resolve()}") + + app_instance = NiceGuiApp(user_id=cli_args.user_id, cli_args_ns=cli_args) + + @ui.page('/') + def main_page_entry(client: Client): # client arg is passed by NiceGUI + app_instance.build_ui() + + app.add_static_files('/static', SCRIPT_PATH / 'static') + + ui.run(title="Galaxy Chat Analysis (DSPy)", storage_secret=str(uuid.uuid4()), + port=9090, reload=True, # Check env var for reload + uvicorn_logging_level='info', + favicon=SCRIPT_PATH / "favicon.ico",) \ No newline at end of file diff --git a/dspy_agent/requirements_nicegui_dspy.txt b/dspy_agent/requirements_nicegui_dspy.txt new file mode 100644 index 0000000..ff65894 --- /dev/null +++ b/dspy_agent/requirements_nicegui_dspy.txt @@ -0,0 +1,34 @@ +# Core Application & Frameworks +nicegui +dspy-ai==2.4.3 +python-dotenv +cloudpickle +psycopg2-binary + +# Core Data Science & Plotting +pandas +numpy>=1.24.0 +scipy +matplotlib +seaborn +Pillow +plotly +openpyxl +pyarrow +tables +statsmodels + +# Machine Learning & Modeling +scikit-learn +joblib +autogluon +xgboost +lightgbm +catboost + +# Bioinformatics +anndata +biopython +PyVCF +pysam +gffutils \ No newline at end of file diff --git a/favicon.ico b/dspy_agent/static/favicon.ico similarity index 100% rename from favicon.ico rename to dspy_agent/static/favicon.ico diff --git a/chat_analysis.py b/pandasai_agent/chat_analysis.py similarity index 99% rename from chat_analysis.py rename to pandasai_agent/chat_analysis.py index 5da98d7..bf7334a 100644 --- a/chat_analysis.py +++ b/pandasai_agent/chat_analysis.py @@ -7,7 +7,7 @@ from pandasai import Agent from pandasai.llm import OpenAI, BambooLLM, AzureOpenAI from pandasai.responses.streamlit_response import StreamlitResponse -import config +import pandasai_agent.config as config from helper import detect_image_path from pandasai.exceptions import PandasAIApiCallError import sys diff --git a/config.py b/pandasai_agent/config.py similarity index 100% rename from config.py rename to pandasai_agent/config.py diff --git a/requirements_old.txt b/pandasai_agent/requirements_pandas.txt similarity index 100% rename from requirements_old.txt rename to pandasai_agent/requirements_pandas.txt diff --git a/Chat.py b/smolagents_agent/Chat.py similarity index 87% rename from Chat.py rename to smolagents_agent/Chat.py index 9709e9d..8748214 100644 --- a/Chat.py +++ b/smolagents_agent/Chat.py @@ -473,11 +473,7 @@ def preview_dataset(self, file): current_data_type = self.input_data_type pandas_compatible_types = ['csv', 'tsv', 'xlsx', 'xls', 'json', 'parquet', 'h5', 'bed'] if current_data_type in pandas_compatible_types and isinstance(data, pd.DataFrame): - if not st.session_state.get("summary_stats_csv_path", None): - generated_summary_path = self.generate_and_save_pandas_summary_csv(data) - st.session_state["summary_stats_csv_path"] = generated_summary_path - else: - generated_summary_path = st.session_state.get("summary_stats_csv_path", None) + generated_summary_path = self.generate_and_save_pandas_summary_csv(data) if generated_summary_path: self.summary_stats_csv_path = generated_summary_path # Store path @@ -518,15 +514,9 @@ def save_chat_history(self): "analysis_file_path": st.session_state.get("analysis_file_path", ""), "input_data_type": st.session_state.get("input_data_type", ""), "bookmarks": st.session_state.get("bookmarks", []), - "summary_stats_csv_path": st.session_state.get("summary_stats_csv_path", ""), } with open(self.chat_hisory_file, "w") as f: json.dump(history, f, indent=2) - bookmark_history = { - "bookmarks": st.session_state.get("bookmarks", []), - } - with open("bookmarks.json", "w") as f: - json.dump(bookmark_history, f, indent=2) def load_chat_history(self): @@ -546,14 +536,60 @@ def load_chat_history(self): st.session_state["analysis_file_path"] = history.get("analysis_file_path", "") st.session_state["input_data_type"] = history.get("input_data_type", "") st.session_state["bookmarks"] = history.get("bookmarks", []) - st.session_state["summary_stats_csv_path"] = history.get("summary_stats_csv_path", "") else: # File is empty; initialize session state with defaults. st.session_state["messages"] = [] st.session_state["eda_report"] = "" st.session_state["memory"] = deque(maxlen=15) st.session_state["bookmarks"] = [] - st.session_state["summary_stats_csv_path"] = "" + + + def display_bookmark_manager(self): + st.title( "Bookmark Manager") + bookmarks = st.session_state.get("bookmarks", []) + if not bookmarks: + st.info("No bookmarks saved.") + return + + for i, b in enumerate(bookmarks): + if not b: + continue + rawq = b.get("question", "Unknown question") + rawa = b.get("answer", "No answer saved") + + question = rawq if rawq else "Unknown question" + answer = rawa if rawa else "No answer saved" + with st.expander(f"Bookmark {i + 1}: {question[:60]}"): + st.markdown(f"**Question:** {question}") + st.markdown(f"**Answer:**\n{answer}") + + if b.get("plots"): + st.markdown("**Saved Plots:**") + for path in b["plots"]: + if os.path.exists(path): + st.image(path, caption=os.path.basename(path)) + + if b.get("files"): + st.markdown("**Saved Files:**") + for path in b["files"]: + if os.path.exists(path): + with open(path, "rb") as f: + st.download_button( + label=f"Download {os.path.basename(path)}", + data=f, + file_name=os.path.basename(path), + key=f"bm_dl_{i}_{path}" + ) + + # if st.button("πŸ” Rerun this query", key=f"rerun_bookmark_{i}"): + # st.session_state["prefilled_input"] = b["question"] + # + + # if st.button("πŸ—‘οΈ Delete", key=f"delete_bookmark_{i}"): + # st.session_state["bookmarks"].pop(i) + # self.save_chat_history() + # st.success("Bookmark deleted.") + # st.experimental_rerun() def load_dataset_preview(self): @@ -624,8 +660,113 @@ def display_middle_steps(self, steps_list): elif seg_type == "code": st.code(seg_text) - # def display_chat_history(self): - # messages = st.session_state.get("messages", []) + def display_response(self, explanation, plot_paths, file_paths, next_steps_suggestion, middle_steps="", candidate_solutions=None): + with st.chat_message("assistant"): + # Clean explanation and next steps text. + explanation = clean_text(explanation) + next_steps_suggestion = clean_text(next_steps_suggestion) + + msg_idx = len(st.session_state["messages"]) - 1 + + # If candidate solutions are provided, display them separately. + if candidate_solutions is not None: + st.markdown("### Candidate Solutions") + for idx, candidate in enumerate(candidate_solutions, start=1): + with st.expander(f"Candidate {idx}: {candidate.get('option', 'Option')}"): + st.markdown(f"**Explanation:** {candidate.get('explanation', '')}") + st.markdown(f"**Pros:** {candidate.get('pros', '')}") + st.markdown(f"**Cons:** {candidate.get('cons', '')}") + # A button to allow the user to refine this candidate solution. + if st.button("Refine this solution", key=f"refine_candidate_{msg_idx}_{idx}"): + # Pre-fill input with candidate details for refinement. + st.session_state["prefilled_input"] = candidate.get("option", "") + " " + candidate.get("explanation", "") + else: + # Display the explanation text normally. + if "count" in explanation and "mean" in explanation and "std" in explanation: + st.code(explanation) + else: + st.markdown(explanation) + + # Display intermediate steps if available. + if middle_steps: + # self.display_middle_steps(middle_steps) + with st.expander("View Intermediate Steps"): + st.markdown(middle_steps) + + # Display any generated plots. + for plot_path in plot_paths: + if plot_path and os.path.exists(plot_path): + image = Image.open(plot_path) + file_name = os.path.basename(plot_path) + file_name_no_ext = os.path.splitext(file_name)[0] + st.image(image, caption=file_name_no_ext) + + # Display file download buttons for any generated files. + for file_path in file_paths: + if file_path and os.path.exists(file_path): + + if file_path.lower().endswith(".csv"): + try: + df = pd.read_csv(file_path) + st.markdown(f"Preview of **{os.path.basename(file_path)}**:") + st.dataframe(df) + except Exception as e: + print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") + if file_path.lower().endswith(".tsv"): + try: + df = pd.read_csv(file_path, sep="\t") + st.markdown(f"Preview of **{os.path.basename(file_path)}**:") + st.dataframe(df) + except Exception as e: + print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") + + unique_key = str(uuid.uuid4()) + with open(file_path, "rb") as f: + st.download_button( + label=f"Download {os.path.basename(file_path)}", + data=f, + file_name=os.path.basename(file_path), + key=f"download_{unique_key}" + ) + + bookmark_data = { + "question": st.session_state["messages"][-2]["content"] if len(st.session_state["messages"]) > 1 else "Unknown", + "answer": explanation, + "plots": plot_paths, + "files": file_paths, + } + + if st.button("πŸ”– Bookmark this response", key=f"bookmark_{msg_idx}"): + st.session_state["bookmarks"].append(bookmark_data) + st.session_state["messages"][msg_idx]["bookmarked"] = True + self.save_chat_history() + st.rerun() + st.success("Response bookmarked!") + + + if st.session_state.get("db_available", False): + if not st.session_state.get(f"feedback_submitted_{msg_idx}", False): + col1, col2 = st.columns(2) + # The on_click callback immediately stores the feedback. + col1.button("πŸ‘", key=f"thumbs_up_{msg_idx}", on_click=self.submit_feedback_response, args=("Yes", msg_idx)) + col2.button("πŸ‘Ž", key=f"thumbs_down_{msg_idx}", on_click=self.submit_feedback_response, args=("No", msg_idx)) + else: + st.info("Feedback recorded!") + # Allow the user to add or update an optional comment. + comment = st.text_area("Optional comment:", key=f"feedback_comment_{msg_idx}") + if st.button("Update Comment", key=f"update_comment_{msg_idx}"): + feedback_id = st.session_state.get(f"feedback_id_{msg_idx}") + update_feedback_comment(feedback_id, comment) + st.success("Comment updated!") + + if not candidate_solutions and next_steps_suggestion: + suggestions = [s.strip() for s in next_steps_suggestion.split("\n") if s.strip()] + self.display_suggestion_buttons(suggestions) + st.markdown("Please let me know if you want to proceed with any of the suggestions or ask any other questions.") + + + def display_chat_history(self): + messages = st.session_state.get("messages", []) # for idx, message in enumerate(messages): # if not message or not message.get("role") or not message.get("content"): @@ -929,11 +1070,10 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): f"- You should find an appropriate method to generate plots for this query. If a plot or file is generated, save it in the directory {self.output_dir} with a random numerical suffix to prevent overwrites.\n" "- Do not generate filenames like 'random_forest_model_XXXX.joblib'.\n" "- Always consider to generate plots or files to support your answer.\n" - f"- If plots are generated, if possible, put the data used to generate the plots in csv files in the {self.output_dir} directory.\n" "- Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' - '"plots": ["" (leave the list empty if no plots are needed)], ' - '"files": ["" (leave the list empty if no files are needed)], ' + '"plots": ["" (leave empty if no plots are needed)], ' + '"files": ["" (leave empty if no files are needed)], ' '"next_steps_suggestion": ["List of possible next questions the user could ask to gain further insights. They should be questions. Only include this when the user has not explicitly asked for suggestions."] }' ) elif question_type == 1: @@ -947,8 +1087,8 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): "- Always consider to generate plots or files to support your answer.\n" "- Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' - '"plots": ["" (leave the list empty if no plots are needed)], ' - '"files": ["" (leave the list empty if no files are needed)], ' + '"plots": ["" (leave empty if no plots are needed)], ' + '"files": ["" (leave empty if no files are needed)], ' '"next_steps_suggestion": ["List of possible next questions the user could ask to gain further insights. They should be questions. Only include this when the user has not explicitly asked for suggestions."] }' ) else: @@ -967,10 +1107,9 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): f"- If a plot or file is generated, save it in the {self.output_dir} directory with a random numerical suffix to prevent overwrites.\n" "- Do not generate filenames like 'random_forest_model_XXXX.joblib'.\n" "- Always consider to generate plots or files to support your answer.\n" - f"- If plots are generated, if possible, put the data used to generate the plots in csv files in the {self.output_dir} directory.\n" "- Always call the final_answer tool, providing the final answer in one of the following dictionary formats (do not format as a JSON code block):\n\n" "Simple answer format:\n" - '{ "explanation": ["Your explanation text. in plain text. This can include detailed information or step-by-step guidance."], "plots": ["" (leave the list empty if no plots are needed)], "files": ["" (leave the list empty if no files are needed)], "next_steps_suggestion": ["Suggestion 1", "Suggestion 2"] }\n\n' + '{ "explanation": ["Your explanation text. in plain text. This can include detailed information or step-by-step guidance."], "plots": [""], "files": [""], "next_steps_suggestion": ["Suggestion 1", "Suggestion 2"] }\n\n' "Multiple candidate solutions format:\n" '{ "candidate_solutions": [ { "option": "Solution 1", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." }, { "option": "Solution 2", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." }, { "option": "Solution 3", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." } ], "next_steps_suggestion": ["Which option would you like to refine?", "Or ask for more details on a candidate solution."] }' ) @@ -1048,7 +1187,7 @@ def run_eda(self, temp_file_path): # ) file_paths = parsed.get("files", []) - file_paths = [eda_file_path] + file_paths if file_paths else [eda_file_path] + file_paths = [eda_file_path] + file_paths eda_result_message = { "role": "assistant", @@ -1061,8 +1200,14 @@ def run_eda(self, temp_file_path): st.session_state["messages"].append(eda_result_message) st.session_state["memory"].append(f"Assistant (EDA): {report_text}") + self.display_response( + explanation=report_text, + plot_paths=parsed.get("plots", []) if parsed else [], + file_paths=file_paths, + next_steps_suggestion=" \n* ".join(parsed.get("next_steps_suggestion", [])) if parsed else "", + middle_steps=middle_steps + ) self.save_chat_history() - st.rerun() except Exception as e: st.error(f"Error during EDA: {e}") @@ -1114,6 +1259,15 @@ def process_response(self, response, middle_steps=""): "middle_steps": middle_steps }) st.session_state["memory"].append(f"{role.capitalize()}: Multiple candidate solutions generated.") + # Display candidate solutions + self.display_response( + explanation="Multiple candidate solutions generated.", + plot_paths=[], + file_paths=[], + next_steps_suggestion=next_steps, + middle_steps=middle_steps, + candidate_solutions=candidate_list + ) else: message = { @@ -1134,6 +1288,13 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append(f"{role.capitalize()}: {message['explanation']}") + self.display_response( + message["explanation"], + message["plots"], + message["files"], + message["next_steps_suggestion"], + message["middle_steps"] + ) else: st.session_state["messages"].append({ @@ -1156,6 +1317,14 @@ def process_response(self, response, middle_steps=""): "middle_steps": middle_steps }) st.session_state["memory"].append("Assistant: Multiple candidate solutions generated.") + self.display_response( + explanation="", + plot_paths=[], + file_paths=[], + next_steps_suggestion=next_steps, + middle_steps=middle_steps, + candidate_solutions=candidate_list + ) else: message = { @@ -1177,6 +1346,13 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append("Assistant: " + message["explanation"]) + self.display_response( + message["explanation"], + message["plots"], + message["files"], + message["next_steps_suggestion"], + message["middle_steps"] + ) # Case 3: Response is a plain string. @@ -1196,8 +1372,6 @@ def process_response(self, response, middle_steps=""): "role": "assistant", "content": f"Response received:\n\n{response}\n" }) - self.save_chat_history() - st.rerun() @@ -1290,8 +1464,8 @@ def run(self): if os.path.exists(st.session_state["analysis_file_path"]): if st.sidebar.button("Correlation Matrix", key="corr_matrix"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the correlation matrix of the features.") - if st.sidebar.button("Identify missing values & drop sparse columns", key="missing_values"): - self.handle_user_input(st.session_state["analysis_file_path"], "Are there any missing values in the dataset? If so, which columns contain them? If applicable, remove the columns with mostly missing values and return the modified dataset. Only return the dataset if it was modified.") + if st.sidebar.button("Missing Values", key="missing_values"): + self.handle_user_input(st.session_state["analysis_file_path"], "What are the missing values in the dataset?") if st.sidebar.button("Numerical Feature Distribution", key="num_dist"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the distribution of numerical features.") # if st.sidebar.button("Summary Statistics", key="summary_stats"): diff --git a/smolagents_agent/chat_nicegui.py b/smolagents_agent/chat_nicegui.py new file mode 100644 index 0000000..59e5224 --- /dev/null +++ b/smolagents_agent/chat_nicegui.py @@ -0,0 +1,1557 @@ +import os +import re +import base64 +import pandas as pd +from collections import deque +from dotenv import load_dotenv +import json +import uuid +import logging +import sys +from pathlib import Path +import psycopg2 +import asyncio +import argparse # Ensure this is at the top + +import yaml +import importlib.resources + +# NiceGUI imports +from nicegui import ui, app, Client +from nicegui.events import UploadEventArguments +from nicegui import app +from functools import lru_cache + +from smolagents_agent.prompt import CODE_AGENT_SYSTEM_PROMPT + +# Smolagents imports +try: + from smolagents import CodeAgent, LiteLLMModel +except ImportError: + logging.error("smolagents or LiteLLMModel not found. Ensure it's installed and in PYTHONPATH.") + CodeAgent = object + LiteLLMModel = object + +# --- Global Constants and Configuration --- +OPENAI_API_KEY_FILE = Path("user_config_openai.key") +GROQ_API_KEY_FILE = Path("user_config_groq.key") +DEFAULT_OUTPUT_DIR = Path("outputs_dir") +DEFAULT_CHAT_HISTORY_FILE = Path("chat_history_nicegui.json") +SCRIPT_PATH = Path(__file__).resolve().parent + +load_dotenv() +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +def get_custom_prompt_templates() -> dict: # Or PromptTemplates if imported + try: + # Load the default prompt templates YAML from the smolagents library + default_prompts_yaml = importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text() + custom_templates = yaml.safe_load(default_prompts_yaml) + except Exception as e: + logging.error(f"Could not load default smolagents prompts. Using a basic structure. Error: {e}") + # Fallback if default loading fails (ensure all required keys for PromptTemplates are present) + from smolagents.agents import EMPTY_PROMPT_TEMPLATES # Adjust path if needed + custom_templates = EMPTY_PROMPT_TEMPLATES.copy() # Make a copy + + # Replace the system_prompt with your custom one + custom_templates["system_prompt"] = CODE_AGENT_SYSTEM_PROMPT + + # Ensure other necessary keys from PromptTemplates are present if using EMPTY_PROMPT_TEMPLATES as fallback + # For example, if EMPTY_PROMPT_TEMPLATES doesn't have all nested dicts: + if "planning" not in custom_templates: + custom_templates["planning"] = {"initial_plan": "", "update_plan_pre_messages": "", "update_plan_post_messages": ""} + if "managed_agent" not in custom_templates: + custom_templates["managed_agent"] = {"task": "", "report": ""} + if "final_answer" not in custom_templates: + custom_templates["final_answer"] = {"pre_messages": "", "post_messages": ""} + + return custom_templates + +# Prepare it once +CUSTOM_PROMPT_TEMPLATES = get_custom_prompt_templates() + +# --- Helper Functions --- +def save_key_to_specific_file(file_path: Path, key_value: str): + try: + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, "w") as f: f.write(key_value) + logging.info(f"API key saved to {file_path}") + except Exception as e: logging.error(f"Error saving API key to {file_path}: {e}", exc_info=True) + +def load_key_from_specific_file(file_path: Path) -> str | None: + try: + if file_path.exists(): + with open(file_path, "r") as f: key = f.read().strip() + if key: logging.info(f"API key loaded from {file_path}"); return key + except Exception as e: logging.error(f"Error loading API key from {file_path}: {e}", exc_info=True) + return None + +def check_db_env_vars(): + required_vars = ["PG_HOST_DA", "PG_DB_DA", "PG_USER_DA", "PG_PASSWORD_DA"] + missing_vars = [var for var in required_vars if not os.environ.get(var)] + if missing_vars: logging.warning(f"Missing DB env vars: {missing_vars}"); return False + return True + +def get_db_connection(): + if not check_db_env_vars(): return None + try: + return psycopg2.connect( + host=os.environ["PG_HOST_DA"], database=os.environ["PG_DB_DA"], + user=os.environ["PG_USER_DA"], password=os.environ["PG_PASSWORD_DA"] + ) + except Exception as e: logging.error(f"DB connection failed: {e}"); return None + +def init_feedback_db(): + if not all(os.environ.get(var) for var in ["PG_HOST_DA", "PG_DB_DA", "PG_USER_DA", "PG_PASSWORD_DA"]): + logging.warning("PostgreSQL environment variables not fully set. Feedback DB will not be initialized.") + return False + conn = get_db_connection() + if not conn: logging.error("Cannot init feedback DB: No connection."); return False + try: + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS message_feedback ( + id SERIAL PRIMARY KEY, user_id TEXT NOT NULL, question TEXT NOT NULL, + answer TEXT NOT NULL, feedback TEXT NOT NULL, comment TEXT, + dataset_path TEXT, timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP); + """) + conn.commit() + logging.info("Feedback DB initialized."); return True + except Exception as e: logging.error(f"Error initializing feedback DB table: {e}", exc_info=True); return False + finally: + if conn: conn.close() + +@lru_cache(maxsize=5) +def create_agent_cached(api_key, model_id_with_prefix): + if LiteLLMModel is object or CodeAgent is object: + raise RuntimeError("LiteLLMModel or CodeAgent not available. Smolagents might not be installed or failed to import.") + model = LiteLLMModel(model_id=model_id_with_prefix, api_key=api_key) + return CodeAgent( + tools=[], model=model, + prompt_templates=CUSTOM_PROMPT_TEMPLATES, + additional_authorized_imports=[ + "pandas", "numpy", "matplotlib", "seaborn", "scipy", "sklearn", + "pycaret", "plotly", "joblib", "io", "xgboost", "lightgbm", + "catboost", "anndata", "Bio", "pysam", "gffutils" + ], max_steps=20) + +class NiceGuiApp: + MODEL_OPTIONS_SELECT = { + "gpt-4o": "OpenAI (GPT-4o)", "gpt-4o-mini": "OpenAI (GPT-4o-mini)", + "gpt-4": "OpenAI (GPT-4)", "gpt-3.5-turbo": "OpenAI (GPT-3.5-Turbo)", + "llama-3.3-70b-versatile": "Groq (Llama-3.3-70B)", + "llama3-70b-8192": "Groq (Llama3-70B-8192)", + "mixtral-8x7b-32768": "Groq (Mixtral-8x7B)", + } + + def __init__(self, user_id: str, cli_args_ns: argparse.Namespace): + self.user_id = user_id + self.cli_args = cli_args_ns + self.agent = None + self.output_dir = Path(self.cli_args.generate_file_path) + self.output_dir.mkdir(parents=True, exist_ok=True) + + self.messages = [] + self.memory = deque(maxlen=30) + self.bookmarks = [] # For bookmarking functionality + + self.current_dataset_file_path: Path | None = None + self.current_dataset_display_name = "No dataset loaded" + self.current_input_data_type = self.cli_args.input_data_type + self.current_data_object = None + self.summary_stats_csv_path: Path | None = None + self.eda_report_path: Path | None = None + self.db_available = False + + self.openai_api_key = "" + self.groq_api_key = "" + self.selected_model_id = "gpt-4o" + self.selected_model_name = self.MODEL_OPTIONS_SELECT.get(self.selected_model_id, self.selected_model_id) + + self.selected_message_for_details_idx: int | None = None + self.selected_bookmark_for_details: dict | None = None # To distinguish bookmark details from live chat details + + # UI element references + self.chat_container: ui.column | None = None + self.dataset_preview_area: ui.column | None = None + self.sidebar_api_status_label: ui.label | None = None + self.details_container: ui.column | None = None + self.openai_key_input: ui.input | None = None + self.groq_key_input: ui.input | None = None + self.model_select_element: ui.select | None = None + self.chat_input_field: ui.input | None = None + self.left_drawer: ui.left_drawer | None = None + self.bookmarks_container: ui.column | None = None # For sidebar bookmarks + + self.chat_history_file_path = Path(self.cli_args.chat_history_path) + self.initial_dataset_path_from_arg: Path | None = Path(self.cli_args.input_file_path) if self.cli_args.input_file_path else None + + self.load_initial_state() + + def load_initial_state(self): + cli_openai_path_str = self.cli_args.cli_openai_key_file_path + if cli_openai_path_str: + cli_openai_key = load_key_from_specific_file(Path(cli_openai_path_str)) + if cli_openai_key: self.openai_api_key = cli_openai_key; save_key_to_specific_file(OPENAI_API_KEY_FILE, cli_openai_key) + if not self.openai_api_key: self.openai_api_key = load_key_from_specific_file(OPENAI_API_KEY_FILE) or "" + + cli_groq_path_str = self.cli_args.cli_groq_key_file_path + if cli_groq_path_str: + cli_groq_key = load_key_from_specific_file(Path(cli_groq_path_str)) + if cli_groq_key: self.groq_api_key = cli_groq_key; save_key_to_specific_file(GROQ_API_KEY_FILE, cli_groq_key) + if not self.groq_api_key: self.groq_api_key = load_key_from_specific_file(GROQ_API_KEY_FILE) or "" + + if self.chat_history_file_path.exists(): + try: + with open(self.chat_history_file_path, "r") as f: history = json.load(f) + self.messages = history.get("messages", []) + self.memory = deque(history.get("memory", []), maxlen=30) + self.bookmarks = history.get("bookmarks", []) # Load bookmarks + + saved_dataset_path_str = history.get("analysis_file_path") + if saved_dataset_path_str: + saved_dataset_path = Path(saved_dataset_path_str) + if saved_dataset_path.exists(): + self.current_dataset_file_path = saved_dataset_path + self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = history.get("input_data_type", self.current_input_data_type) + + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + self.current_dataset_file_path = self.initial_dataset_path_from_arg + self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = self.cli_args.input_data_type # Prioritize CLI type if file is from CLI + + summary_path_str = history.get("summary_stats_csv_path"); eda_path_str = history.get("eda_report_path") + if summary_path_str and Path(summary_path_str).exists(): self.summary_stats_csv_path = Path(summary_path_str) + if eda_path_str and Path(eda_path_str).exists(): self.eda_report_path = Path(eda_path_str) + + # Update 'bookmarked' status in self.messages based on loaded bookmarks + bookmarked_message_timestamps = {bm.get('assistant_response', {}).get('timestamp') for bm in self.bookmarks if bm.get('assistant_response')} + for msg in self.messages: + if msg.get("role") == "assistant" and msg.get("timestamp") in bookmarked_message_timestamps: + msg['bookmarked'] = True + + logging.info(f"Chat history loaded from {self.chat_history_file_path}") + except Exception as e: logging.error(f"Error loading chat history: {e}", exc_info=True) + self.db_available = init_feedback_db() + + def save_chat_history(self): + history = { + "messages": self.messages, "memory": list(self.memory), + "bookmarks": self.bookmarks, # Save bookmarks + "analysis_file_path": str(self.current_dataset_file_path) if self.current_dataset_file_path else None, + "input_data_type": self.current_input_data_type, + "summary_stats_csv_path": str(self.summary_stats_csv_path) if self.summary_stats_csv_path else None, + "eda_report_path": str(self.eda_report_path) if self.eda_report_path else None, + } + try: + with open(self.chat_history_file_path, "w") as f: json.dump(history, f, indent=2) + logging.info(f"Chat history (including bookmarks) saved to {self.chat_history_file_path}.") + except Exception as e: logging.error(f"Error saving chat history: {e}", exc_info=True) + + def _get_final_api_key_and_model_id(self): + is_openai = self.selected_model_id.startswith("gpt-") + is_groq = any(kw in self.selected_model_id for kw in ["llama", "mixtral", "gemma"]) + final_api_key, final_model_id_for_agent = None, self.selected_model_id + if is_openai: final_api_key = self.openai_api_key + elif is_groq: + final_api_key = self.groq_api_key + if not final_model_id_for_agent.startswith("groq/"): final_model_id_for_agent = "groq/" + final_model_id_for_agent + return final_api_key, final_model_id_for_agent + + def try_initialize_agent(self): + final_api_key, final_model_id_for_agent = self._get_final_api_key_and_model_id() + status_message, status_color = "", "" + logging.info(f"Attempting to initialize agent with model: {final_model_id_for_agent}. API Key present: {bool(final_api_key)}") + if final_api_key and final_model_id_for_agent: + try: + self.agent = create_agent_cached(final_api_key, final_model_id_for_agent) + logging.info(f"system prompt: {self.agent.system_prompt}") + status_message = f"Agent Ready ({self.selected_model_name})" + status_color = 'green'; ui.notify(status_message, type='positive', timeout=3000, position='top'); logging.info(status_message) + except Exception as e: + self.agent = None; error_str = str(e).lower() + logging.error(f"Agent init failed for model '{final_model_id_for_agent}' using key '***{final_api_key[-4:] if final_api_key and len(final_api_key) > 4 else 'EMPTY/SHORT'}': {e}", exc_info=True) + auth_keywords = ["authentication", "api key", "invalid key", "permission denied", "unauthorized", "401"] + model_not_found_keywords = ["model_not_found", "does not exist", "404"] + if any(keyword in error_str for keyword in auth_keywords) or isinstance(e, getattr(sys.modules.get("litellm.exceptions", object), "AuthenticationError", tuple())): + status_message = f"Agent Error: API Key for {self.selected_model_name} seems invalid or lacks permissions. Verify key. (Details in server console)" + elif any(keyword in error_str for keyword in model_not_found_keywords) or isinstance(e, getattr(sys.modules.get("litellm.exceptions", object), "ModelNotFound", tuple())): + status_message = f"Agent Error: Model '{self.selected_model_name}' not found/accessible. Check model name & key. (Details in server console)" + else: status_message = f"Agent Error: Failed to initialize {self.selected_model_name}. Check server console." + ui.notify(status_message + f" Error: {str(e)[:100]}...", type='negative', multi_line=True, classes='w-96 whitespace-pre-wrap', auto_close=False, position='center') + status_color = 'red' + else: + self.agent = None; missing_parts = [] + provider_name = "the selected provider" + if self.selected_model_id: + if self.selected_model_id.startswith("gpt-"): provider_name = "OpenAI" + elif any(kw in self.selected_model_id for kw in ["llama", "mixtral", "gemma"]): provider_name = "Groq" + if not final_api_key: missing_parts.append(f"API Key missing for {provider_name}") + if not final_model_id_for_agent or final_model_id_for_agent == "groq/": missing_parts.append("a valid model is not selected") + status_message = "Agent Not Ready: " + (" and ".join(missing_parts) if missing_parts else "Unknown configuration issue.") + ". Configure in sidebar." + status_color = 'orange'; ui.notify(status_message, type='warning', multi_line=True, classes='w-96 whitespace-pre-wrap', auto_close=False, position='center') + logging.warning(status_message) + if self.sidebar_api_status_label: + self.sidebar_api_status_label.set_text(status_message) + self.sidebar_api_status_label.style(f'color: {status_color}; font-weight: bold; font-size: 0.8rem;') + self.sidebar_api_status_label.tooltip(status_message if len(status_message) > 40 else '') + return self.agent is not None + + async def handle_upload(self, e: UploadEventArguments): + if not e.content: ui.notify("No file content.", type='negative'); return + uploaded_filename = e.name + self.current_input_data_type = Path(uploaded_filename).suffix.lower().replace('.', '') or 'csv' + temp_file_path = self.output_dir / uploaded_filename + try: + with open(temp_file_path, 'wb') as f: f.write(e.content.read()) + self.current_dataset_file_path = temp_file_path; self.current_dataset_display_name = uploaded_filename + ui.notify(f"File '{uploaded_filename}' processed.", type='positive') + self.summary_stats_csv_path = self.eda_report_path = None + self.messages.append({ + "role": "system", + "content": f"New dataset loaded: {uploaded_filename}. Please analyze.", + "type": "text", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat() + }) + self.update_chat_display(); await self.preview_loaded_or_uploaded_dataset(); self.save_chat_history() + except Exception as ex: + ui.notify(f"Error processing '{uploaded_filename}': {ex}", type='negative', multi_line=True); logging.error(f"Upload error for {uploaded_filename}: {ex}", exc_info=True) + self.current_dataset_file_path = None; self.current_dataset_display_name = "No dataset" + + def load_data_object_from_path(self, file_path: Path, data_type: str): + try: + if data_type == 'csv': return pd.read_csv(file_path) + elif data_type == "tsv": return pd.read_csv(file_path, sep="\t") + elif data_type == "h5ad": import anndata; return anndata.read_h5ad(file_path) + elif data_type in ("xlsx", "xls"): return pd.read_excel(file_path) + elif data_type == "json": return pd.read_json(file_path) + elif data_type == "parquet": return pd.read_parquet(file_path) + elif data_type == "h5": return pd.read_hdf(file_path) + elif data_type in ("fa", "fasta"): from Bio import SeqIO; return list(SeqIO.parse(str(file_path), "fasta")) + elif data_type == "vcf": import pysam; return pysam.VariantFile(str(file_path)) + elif data_type in ("gtf", "gff"): + import gffutils; db_path = self.output_dir / f"{file_path.stem}.{data_type}.db" + return gffutils.create_db(str(file_path), dbfn=str(db_path), force=True, keep_order=True, merge_strategy="merge", sort_attribute_values=True) + elif data_type == "bed": return pd.read_csv(file_path, sep="\t", header=None) + else: raise ValueError(f"Unsupported file type for direct load: {data_type}") + except ImportError as ie: ui.notify(f"Missing library for {data_type}: {ie}. Install it.", type='error', multi_line=True, auto_close=False); logging.error(f"ImportError loading {file_path.name} ({data_type}): {ie}", exc_info=True); return None + except Exception as e: ui.notify(f"Error loading {file_path.name} ({data_type}): {e}", type='negative', multi_line=True, auto_close=False); logging.error(f"Load error for {file_path} ({data_type}): {e}", exc_info=True); return None + + async def preview_loaded_or_uploaded_dataset(self): + if not self.current_dataset_file_path or not self.dataset_preview_area: + if self.dataset_preview_area: self.dataset_preview_area.clear(); + with self.dataset_preview_area: ui.label("No dataset selected.") + return + self.dataset_preview_area.clear() + with self.dataset_preview_area: + ui.label(f"Active: {self.current_dataset_display_name} ({self.current_input_data_type.upper()})").classes('text-md font-semibold mb-1') + self.current_data_object = self.load_data_object_from_path(self.current_dataset_file_path, self.current_input_data_type) + if self.current_data_object is None: ui.label("Failed to load data for preview.").classes('text-red-500'); return + preview_table_height_classes = 'h-[200px] max-h-[200px] overflow-auto w-full bordered' + summary_table_height_classes = 'h-[280px] max-h-[280px] overflow-auto w-full bordered' + props_for_table = 'dense flat bordered separator=cell' + if isinstance(self.current_data_object, pd.DataFrame): + ui.markdown("###### Data Preview (Top 5)"); ui.table.from_pandas(self.current_data_object.head(5)).classes(preview_table_height_classes).props(props_for_table) + if not self.summary_stats_csv_path or not self.summary_stats_csv_path.exists(): self.summary_stats_csv_path = self.generate_and_save_pandas_summary_csv(self.current_data_object) + if self.summary_stats_csv_path and self.summary_stats_csv_path.exists(): + try: + summary_df = pd.read_csv(self.summary_stats_csv_path, index_col=0) + ui.markdown("###### Summary Statistics").classes('mt-2'); ui.table.from_pandas(summary_df).classes(summary_table_height_classes).props(props_for_table) + ui.button("Download Summary", icon="download", on_click=lambda: ui.download(str(self.summary_stats_csv_path), filename=self.summary_stats_csv_path.name)).props("dense size=sm flat").classes("mt-1 text-sm text-indigo-600 hover:text-indigo-800") + except Exception as e: ui.notify(f"Err displaying summary: {e}", type='warning'); logging.warning(f"Summary CSV err {self.summary_stats_csv_path}: {e}") + elif hasattr(self.current_data_object, "obs") and hasattr(self.current_data_object, "var"): # AnnData + ui.markdown("###### AnnData Obs (Top 5)"); ui.table.from_pandas(self.current_data_object.obs.head(5)).classes(preview_table_height_classes).props(props_for_table) + ui.markdown("###### AnnData Vars (Top 5)").classes('mt-2'); ui.table.from_pandas(self.current_data_object.var.head(5)).classes(preview_table_height_classes).props(props_for_table) + elif isinstance(self.current_data_object, list) and self.current_input_data_type in ("fa", "fasta"): # FASTA + ui.markdown("###### FASTA (First 3)") + for i, record in enumerate(self.current_data_object[:3]): ui.markdown(f"**ID:** `{record.id}`\n**Seq (60bp):** `{str(record.seq)[:60]}...`").classes("text-xs") + elif hasattr(self.current_data_object, "header") and self.current_input_data_type == "vcf": # VCF (pysam) + ui.markdown("###### VCF Header"); ui.code(str(self.current_data_object.header)).classes('max-h-40 overflow-auto text-xs') + ui.markdown("###### VCF Records (First 3)").classes("mt-1"); recs = list(self.current_data_object.fetch(max_records=3)); + for rec in recs: ui.code(str(rec)).classes("text-xs") + elif hasattr(self.current_data_object, "all_features") and self.current_input_data_type in ("gtf", "gff"): # gffutils DB + ui.markdown("###### GTF/GFF Features (First 3)").classes("mt-1"); features = list(self.current_data_object.all_features(limit=3)) + for feature in features: ui.code(str(feature)).classes("text-xs") + else: ui.label(f"Enhanced preview for {self.current_input_data_type.upper()} not fully shown here.") + + def generate_and_save_pandas_summary_csv(self, dataframe: pd.DataFrame) -> Path | None: + if not isinstance(dataframe, pd.DataFrame): return None + original_filename_stem = Path(self.current_dataset_display_name).stem if self.current_dataset_display_name and self.current_dataset_display_name != "No dataset loaded" else "dataset" + try: + summary_df = dataframe.describe(include='all') + summary_filename = f"summary_stats_for_{original_filename_stem}_{uuid.uuid4().hex[:6]}.csv" + summary_csv_path = self.output_dir / summary_filename + summary_df.to_csv(summary_csv_path, index=True) + logging.info(f"Pandas summary saved: {summary_csv_path}") + return summary_csv_path + except Exception as e: logging.error(f"Error gen/save pandas summary: {e}", exc_info=True); ui.notify(f"Error in summary stats: {e}", type='negative'); return None + + def get_agent_prompt_for_nicegui(self, user_question: str, question_type: int = 2): + memory_history_str = "\n".join(self.memory) + memory_context = f"PREVIOUS CONVERSATION HISTORY (for context only, do not repeat yourself):\n{memory_history_str}\nEND OF PREVIOUS CONVERSATION.\n\n" if memory_history_str else "This is the beginning of the conversation.\n\n" + # core_instructions = ( + # "You are an expert data analysis assistant who can solve any task by writing and executing Python code.\n" + # "To solve the task, you MUST plan step-by-step and respond by thinking, then writing a single Python code block, then waiting for an observation. This cycle repeats: Thought, Code, Observation.\n" + # "Authorized libraries include pandas, numpy, matplotlib, seaborn, scipy, sklearn, pycaret, plotly, joblib, io, xgboost, lightgbm, catboost, anndata, Bio, pysam, gffutils.\n" + # ) + dataset_info = ( + f"The primary dataset is located at: '{self.current_dataset_file_path}'.\n" + f"This dataset is of type: '{self.current_input_data_type}'. Use this type to determine how to read/load it. Ignore the file extension if it conflicts.\n" + f"Any plots or data files you generate MUST be saved to this relative directory: '{self.output_dir}'.\n" + "Ensure filenames are descriptive and unique (e.g., append a random suffix like `_1a2b3c.png`). Do NOT use generic placeholders like 'XXXX'.\n" + ) + output_format_instructions = ( + " Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" + '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' + '"plots": ["" (leave the list empty if no plots are needed)], ' + '"files": ["" (leave the list empty if no files are needed)], ' + '"next_steps_suggestion": ["List of possible next questions the user could ask to gain further insights. They should be questions. Only include this when the user has not explicitly asked for suggestions."] }' + ) + current_task_prompt = "" + if question_type == 0: + current_task_prompt = (f"Perform a comprehensive Exploratory Data Analysis (EDA) on the provided dataset. Specifically address: {user_question}\n Always generate plots and supporting data files where appropriate for an EDA.") + elif question_type == 1: + current_task_prompt = f"Summarize the previous conversation provided in the history concisely. User's request: {user_question}" + else: + current_task_prompt = (f"Address the following user question: {user_question}\nBefore answering, critically analyze if the question is multifaceted, ambiguous, or covers several distinct aspects. If so, provide three distinct candidate solutions using the 'candidate_solutions' JSON format. Otherwise, if the question is straightforward, provide a single concise answer using the standard JSON format. Most questions should be straightforward.") + final_prompt = (f"{memory_context}\n{dataset_info}\nCURRENT TASK: {current_task_prompt}\n\nOUTPUT REQUIREMENTS:\n{output_format_instructions}\nRemember to think step-by-step using the Thought, Code, Observation cycle before calling final_answer().") + return final_prompt + + def parse_response_content_for_nicegui(self, content_str: str | dict ): + if isinstance(content_str, dict): return content_str + if not isinstance(content_str, str): + logging.warning(f"Agent response not string or dict: {type(content_str)}. Content: {str(content_str)[:100]}") + return {"explanation": f"Unexpected response format: {str(content_str)[:100]}", "plots": [], "files": [], "next_steps_suggestion": []} + try: + match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content_str, re.DOTALL) + json_str = match.group(1).strip() if match else content_str.strip() + try: parsed = json.loads(json_str) + except json.JSONDecodeError: + brace_start, brace_end = json_str.find('{'), json_str.rfind('}') + if brace_start != -1 and brace_end != -1 and brace_end > brace_start: + potential_json = json_str[brace_start : brace_end+1] + try: parsed = json.loads(potential_json) + except json.JSONDecodeError as e_inner: logging.error(f"Robust JSON decode error: {e_inner}. String part: {potential_json[:200]}...", exc_info=True); raise e_inner + else: logging.error(f"No clear JSON object in string: {json_str[:200]}..."); raise json.JSONDecodeError("No JSON delimiters", json_str, 0) + if not isinstance(parsed.get("explanation") if "explanation" in parsed else parsed.get("candidate_solutions"), (str, list)): + if not ("candidate_solutions" in parsed and isinstance(parsed["candidate_solutions"], list)): + logging.warning(f"Parsed JSON unexpected structure: {str(parsed)[:200]}") + return {"explanation": f"Agent response (parsed, structure differs): {str(parsed)}", "plots": [], "files": [], "next_steps_suggestion": []} + return parsed + except Exception as e: + logging.error(f"Error parsing agent response: {e}. Content: {content_str[:500]}", exc_info=True) + return {"explanation": f"Could not parse LLM response. Raw content (first 200 chars): {content_str[:200]}", "plots": [], "files": [], "next_steps_suggestion": []} + + def update_sidebar_bookmarks(self): + if not self.bookmarks_container: + logging.warning("Bookmarks container not initialized, cannot update sidebar bookmarks.") + return + + self.bookmarks_container.clear() + with self.bookmarks_container: + if not self.bookmarks: + ui.label("No bookmarks yet.").classes("text-xs text-gray-500 p-2 text-center") + else: + for idx, bookmark in enumerate(self.bookmarks): + # Ensure essential keys exist, provide defaults if not + user_q = bookmark.get("user_question", "Bookmarked Item") + assistant_resp = bookmark.get("assistant_response", {}) + assistant_content_snippet = str(assistant_resp.get("content", ""))[:70] + "..." + + with ui.card().tight().classes("w-full my-1 shadow-md hover:shadow-lg transition-shadow cursor-pointer"): + with ui.card_section().classes("p-2"): + with ui.row().classes("w-full items-center justify-between no-wrap"): + with ui.column().classes("flex-grow").on('click', lambda b=bookmark: self.show_bookmark_details(b)): # Make text clickable + ui.label(f"Q: {user_q[:50]}...").classes("text-xs font-semibold text-indigo-700").style("white-space: normal; word-break: break-word; line-height: 1.2;") + ui.label(f"A: {assistant_content_snippet}").classes("text-xs text-gray-600 mt-1").style("white-space: normal; word-break: break-word; line-height: 1.2;") + ui.button(icon='delete_sweep', on_click=lambda i=idx: self.delete_bookmark(i), color='red-5') \ + .props('flat round dense size=xs').tooltip("Delete bookmark") + # Optional: Add timestamp or other small info if available in bookmark_data + # bookmark_timestamp = assistant_resp.get("timestamp") + # if bookmark_timestamp: + # try: stamp_display = pd.to_datetime(bookmark_timestamp).strftime('%b %d, %I:%M %p') + # except: stamp_display = str(bookmark_timestamp)[:16] + # ui.label(stamp_display).classes("text-right text-xs text-gray-400 pr-2 pb-1") + + + # ADD THIS METHOD: + def delete_bookmark(self, bookmark_idx: int): + if 0 <= bookmark_idx < len(self.bookmarks): + deleted_bookmark_content = self.bookmarks[bookmark_idx].get('assistant_response', {}).get('content') + timestamp_of_deleted_bookmark = self.bookmarks[bookmark_idx].get('assistant_response', {}).get('timestamp') + + del self.bookmarks[bookmark_idx] + + # Try to find and unmark the original message in the chat history + # This relies on content and timestamp matching, which is okay but not perfectly robust. + # Unique message IDs would be better if you implement them later. + for msg in self.messages: + if msg.get("role") == "assistant" and \ + msg.get("content") == deleted_bookmark_content and \ + msg.get("timestamp") == timestamp_of_deleted_bookmark: + if 'bookmarked' in msg: + del msg['bookmarked'] # Remove bookmarked flag + break + + ui.notify("Bookmark removed.", type='info') + self.save_chat_history() + self.update_sidebar_bookmarks() # Refresh the list in the sidebar + self.update_chat_display() # Refresh chat to update bookmark button states + + # If the deleted bookmark was being shown in details, clear details + if self.selected_bookmark_for_details and \ + self.selected_bookmark_for_details.get('assistant_response', {}).get('content') == deleted_bookmark_content and \ + self.selected_bookmark_for_details.get('assistant_response', {}).get('timestamp') == timestamp_of_deleted_bookmark: + self.selected_bookmark_for_details = None + self.update_details_pane() # Show placeholder or default view in details + else: + ui.notify("Could not delete bookmark (invalid index).", type='negative') + + # Ensure show_details_for_message and show_bookmark_details are also present + # (They were in my previous full code response and your logs indicate show_details_for_message was missing, + # so I'm re-including it here for completeness if you missed it) + + # ADDED/CORRECTED show_details_for_message method + def show_details_for_message(self, message_idx: int): + self.selected_message_for_details_idx = message_idx + self.selected_bookmark_for_details = None # Clear bookmark selection when viewing live message + if self.details_container: + self.update_details_pane() + else: + logging.warning("Attempted to show message details, but details_container is not yet initialized.") + + def show_bookmark_details(self, bookmark_data: dict): + self.selected_bookmark_for_details = bookmark_data + self.selected_message_for_details_idx = None # Clear live message selection + if self.details_container: + self.update_details_pane() + else: + logging.warning("Attempted to show bookmark details, but details_container is not yet initialized.") + + def add_bookmark(self, message_idx: int): + if 0 <= message_idx < len(self.messages) and self.messages[message_idx].get("role") == "assistant": + assistant_msg = self.messages[message_idx] + if assistant_msg.get('bookmarked'): + ui.notify("This response is already bookmarked.", type='info'); return + + user_question = "Context not found" + for i in range(message_idx - 1, -1, -1): + if self.messages[i].get("role") == "user": + user_question = self.messages[i].get("content", "User query not found"); break + + bookmark_data = { + "user_question": user_question, + "assistant_response": { + "content": assistant_msg.get("content"), + "plots": assistant_msg.get("plots", []), + "files": assistant_msg.get("files", []), + "middle_steps": assistant_msg.get("middle_steps"), + "timestamp": assistant_msg.get("timestamp") # Crucial for unmarking later + } + } + self.bookmarks.append(bookmark_data) + self.messages[message_idx]['bookmarked'] = True + ui.notify("Response bookmarked!", type='positive') + self.save_chat_history() + self.update_sidebar_bookmarks() + self.update_chat_display() + else: + ui.notify("Could not bookmark this message.", type='negative') + + def format_raw_middle_steps_for_display(self, raw_steps: list | None) -> str: + if not raw_steps: + return "*No intermediate steps were recorded by the agent.*" + logging.info(f"Formatting {len(raw_steps)} middle steps.") + formatted_steps_md_parts = ["#### Agent's Workings (Intermediate Steps)\n"] + for idx, step_data in enumerate(raw_steps): + step_number = step_data.get("step", idx + 1) + current_step_md_parts = [f"##### Step {step_number}"] + thought_content = step_data.get("thought", step_data.get("model_output", step_data.get("reasoning"))) + if thought_content: + thought_str = str(thought_content).strip() + task_marker_heading = "CURRENT TASK:" # Remove if agent echoes it + if idx == 0 and task_marker_heading in thought_str: + lines = thought_str.splitlines() + filtered_lines = [line for line in lines if not line.strip().startswith(task_marker_heading) and not line.strip().startswith("Address the following user question:")] + thought_str = "\n".join(filtered_lines).strip() if filtered_lines else "(Agent processed task and planned)" + if not thought_str.strip(): thought_str = "(Agent's initial planning step)" + current_step_md_parts.append(f"**Thought/Plan:**\n```text\n{thought_str}\n```") + action = step_data.get("action"); action_input = step_data.get("action_input") + if action: + action_input_str = json.dumps(action_input, indent=2, default=str) if isinstance(action_input, dict) else str(action_input) + current_step_md_parts.append(f"**Action Called:** `{action}`\n**Action Input:**\n```json\n{action_input_str}\n```") + code_generated = step_data.get("code", step_data.get("code_generated")) + if code_generated: current_step_md_parts.append(f"**Code Executed:**\n```python\n{str(code_generated).strip()}\n```") + observation = step_data.get("observation", step_data.get("action_output", step_data.get("tool_outputs"))) + if observation is not None: + obs_str = str(observation); obs_str = (obs_str[:700] + "\n... (observation truncated)") if len(obs_str) > 700 else obs_str + current_step_md_parts.append(f"**Observation/Result:**\n```text\n{obs_str.strip()}\n```") + error = step_data.get("error") + if error: current_step_md_parts.append(f"**Error Encountered:**\n```text\n{str(error).strip()}\n```") + if len(current_step_md_parts) > 1: formatted_steps_md_parts.append("\n\n".join(current_step_md_parts)) + return "\n\n---\n".join(formatted_steps_md_parts) if len(formatted_steps_md_parts) > 1 else "*No processable intermediate steps found.*" + + # async def handle_user_input(self, user_question: str | None): + # if not user_question or not user_question.strip(): + # if self.chat_input_field: self.chat_input_field.set_value(None); + # return + # if self.chat_input_field: self.chat_input_field.set_value(None) + # if not self.agent: ui.notify("Agent not initialized...", type='warning', position='center', classes="p-4 text-lg", auto_close=False); return + # if not self.current_dataset_file_path: ui.notify("Upload dataset first.", type='warning', position='center'); return + + # self.messages.append({"role": "user", "content": user_question, "type": "text", "timestamp": pd.Timestamp.now().isoformat()}) + # self.memory.append(f"User: {user_question}") + # self.update_chat_display() + + # thinking_indicator_row = None + # if self.chat_container: + # with self.chat_container: + # with ui.row().classes('w-full justify-center my-2') as thinking_indicator_row_instance: ui.spinner(size='lg', color='primary') + # thinking_indicator_row = thinking_indicator_row_instance + # formatted_middle_steps = "*Agent did not record detailed steps.*" + # try: + # prompt = self.get_agent_prompt_for_nicegui(user_question) + # logging.debug(f"Agent prompt for '{user_question[:30]}...': ...{prompt[-300:]}") + # if hasattr(self.agent, 'memory') and hasattr(self.agent.memory, 'clear'): self.agent.memory.clear(); logging.info("Agent memory cleared.") + # response_content = await asyncio.to_thread(self.agent.run, prompt) + # logging.info(f"Agent raw response: {str(response_content)[:500]}...") + # if hasattr(self.agent, 'memory') and hasattr(self.agent.memory, 'get_full_steps'): + # raw_steps = self.agent.memory.get_full_steps(); logging.info(f"Raw middle steps (count: {len(raw_steps)}): {str(raw_steps)[:500]}...") + # formatted_middle_steps = self.format_raw_middle_steps_for_display(raw_steps) + # else: logging.warning("Agent memory/get_full_steps not found."); formatted_middle_steps = "*Agent config lacks detailed step providing.*" + + # parsed_response = self.parse_response_content_for_nicegui(response_content) + # new_assistant_message_idx = -1 + # if parsed_response: + # msg_to_append = {"role": "assistant", "timestamp": pd.Timestamp.now().isoformat(), "middle_steps": formatted_middle_steps} + # if "candidate_solutions" in parsed_response: msg_to_append.update({"content": "Agent proposed multiple approaches:", "type": "text_with_candidates", "candidates": parsed_response["candidate_solutions"], "next_steps": parsed_response.get("next_steps_suggestion", [])}) + + # else: + # explanation_value = parsed_response.get("explanation", "Agent processed request.") + # final_explanation_content_str: str + # if isinstance(explanation_value, list): + # # Join list elements into a single string. map(str, ...) ensures all elements are strings before joining. + # final_explanation_content_str = "\n".join(map(str, explanation_value)) + # else: + # # Value is already a string or some other type that needs to be stringified. + # final_explanation_content_str = str(explanation_value) + + # msg_to_append.update({ + # "content": final_explanation_content_str, # Use the processed string + # "type": "text_with_attachments", + # "plots": parsed_response.get("plots", []), + # "files": parsed_response.get("files", []), + # "next_steps": parsed_response.get("next_steps_suggestion", []) + # }) + # self.messages.append(msg_to_append); new_assistant_message_idx = len(self.messages) - 1 + # self.memory.append(f"Assistant: {parsed_response.get('explanation', 'Response generated.')[:200]}...") + # else: + # self.messages.append({"role": "assistant", "content": f"Unprocessable response. Raw: {str(response_content)[:300]}...", "type": "error", "timestamp": pd.Timestamp.now().isoformat(), "middle_steps": formatted_middle_steps}) + # self.memory.append(f"Assistant: Unprocessable response.") + # if new_assistant_message_idx != -1 and (self.messages[new_assistant_message_idx].get("plots") or self.messages[new_assistant_message_idx].get("files") or (self.messages[new_assistant_message_idx].get("middle_steps") and self.messages[new_assistant_message_idx]["middle_steps"].startswith("####"))): + # self.show_details_for_message(new_assistant_message_idx) + # except Exception as e: + # logging.error(f"Error during agent interaction for '{user_question}': {e}", exc_info=True) + # self.messages.append({"role": "assistant", "content": f"Internal error: {e}", "type": "error", "timestamp": pd.Timestamp.now().isoformat(), "middle_steps": "*Error during execution.*"}) + # finally: + # if thinking_indicator_row: thinking_indicator_row.delete() + # self.update_chat_display(); self.save_chat_history() + + async def handle_user_input(self, user_question: str | None): + if not user_question or not user_question.strip(): + ui.notify("Please enter a question.", type='warning') + if self.chat_input_field: + self.chat_input_field.set_value(None) + return + + if self.chat_input_field: + self.chat_input_field.set_value(None) + + # ---- MODIFIED CHECK: Use self.current_data_object instead of self.df ---- + is_load_command = any(keyword in user_question.lower() for keyword in ["load", "upload", "dataset", "file", "open"]) + if self.current_data_object is None and not is_load_command: + ui.notify("Please upload or specify a dataset first, or ask to load one.", type='warning', position='center') + return + # ---- END MODIFIED CHECK ---- + + self.messages.append({ + "role": "user", + "content": user_question, + "type": "text", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat() + }) + self.memory.append(f"User: {user_question}") + self.update_chat_display() + + spinner_row_to_delete = None + if self.chat_container: + with self.chat_container: + with ui.row().classes('w-full justify-center my-2') as temp_spinner_row: + ui.spinner(size='lg', color='primary') + spinner_row_to_delete = temp_spinner_row + + try: + if not self.agent: + ui.notify("Agent not initialized. Please check API keys and model selection in the sidebar.", type='error', position='center', auto_close=False) + if spinner_row_to_delete: + spinner_row_to_delete.delete() + spinner_row_to_delete = None + return + + prompt = self.get_agent_prompt_for_nicegui(user_question) + logging.debug(f"Agent prompt for '{user_question[:30]}...': ...{prompt[-300:]}") + + if hasattr(self.agent, 'memory') and hasattr(self.agent.memory, 'clear'): + self.agent.memory.clear() + logging.info("Agent's internal memory cleared before run.") + + response_content = await asyncio.to_thread(self.agent.run, prompt) + logging.info(f"Agent raw response: {str(response_content)[:500]}...") + + formatted_middle_steps = "*Agent did not provide detailed steps or steps could not be retrieved.*" + if hasattr(self.agent, 'memory') and hasattr(self.agent.memory, 'get_full_steps'): + raw_steps = self.agent.memory.get_full_steps() + logging.info(f"Raw middle steps (count: {len(raw_steps)}): {str(raw_steps)[:500]}...") + formatted_middle_steps = self.format_raw_middle_steps_for_display(raw_steps) + else: + logging.warning("Agent does not have 'memory.get_full_steps()' method to retrieve detailed steps.") + + parsed_response = self.parse_response_content_for_nicegui(response_content) + new_assistant_message_idx = -1 + if parsed_response: + msg_to_append = { + "role": "assistant", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat(), + "middle_steps": formatted_middle_steps + } + # Using a more generic way to get primary text content from agent for memory + primary_agent_text = "Agent processed request." + if "explanation" in parsed_response: # This is for text_with_attachments + primary_agent_text = parsed_response["explanation"] + elif "final_answer" in parsed_response: # This might be for text_with_candidates or other structures + primary_agent_text = parsed_response["final_answer"] + elif isinstance(response_content, str): # Fallback for very simple responses + primary_agent_text = response_content + + if "candidate_solutions" in parsed_response: + msg_to_append.update({ + "type": "text_with_candidates", + # Use a clearer key for main content if candidates are present + "content": parsed_response.get("overview", "Please review the options below."), + "candidates": parsed_response.get("candidate_solutions", []), + "next_steps": parsed_response.get("next_steps_suggestion", []) + }) + else: + msg_to_append.update({ + "type": "text_with_attachments", + "content": parsed_response.get("explanation", "Agent processed request."), + "plots": parsed_response.get("plots", []), + "files": parsed_response.get("files", []), + "next_steps": parsed_response.get("next_steps_suggestion", []) + }) + self.messages.append(msg_to_append) + new_assistant_message_idx = len(self.messages) - 1 + self.memory.append(f"Agent: {str(primary_agent_text)[:200]}...") + + if new_assistant_message_idx != -1 and \ + (self.messages[new_assistant_message_idx].get("plots") or \ + self.messages[new_assistant_message_idx].get("files") or \ + (self.messages[new_assistant_message_idx].get("middle_steps") and \ + self.messages[new_assistant_message_idx]["middle_steps"].startswith("####"))): + self.show_details_for_message(new_assistant_message_idx) + else: + self.messages.append({ + "role": "assistant", + "content": f"Unprocessable response from agent. Raw (first 300 chars): {str(response_content)[:300]}...", + "type": "error", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat(), + "middle_steps": formatted_middle_steps + }) + self.memory.append("Agent: [Unprocessable response]") + + except Exception as e: + logging.error(f"Error during agent interaction for '{user_question}': {e}", exc_info=True) + self.messages.append({ + "role": "assistant", + "content": f"An internal error occurred: {e}", + "type": "error", + "timestamp": pd.Timestamp.now(tz='UTC').isoformat(), + "middle_steps": "*Error during agent execution phase.*" + }) + finally: + if spinner_row_to_delete: + spinner_row_to_delete.delete() + # spinner_row_to_delete = None # Not strictly necessary but good form + + self.update_chat_display() + self.save_chat_history() + + # def update_details_pane(self): # Modified + # if not self.details_container: logging.warning("Details container NA for update."); return + # self.details_container.clear() + + # source_data = None + # data_origin = None # 'live_message' or 'bookmark' + + # if self.selected_bookmark_for_details: + # source_data = self.selected_bookmark_for_details + # data_origin = 'bookmark' + # elif self.selected_message_for_details_idx is not None and self.selected_message_for_details_idx < len(self.messages): + # source_data = self.messages[self.selected_message_for_details_idx] + # data_origin = 'live_message' + # elif self.selected_message_for_details_idx is None: # Default to last assistant message with details + # for i in range(len(self.messages) - 1, -1, -1): + # msg = self.messages[i] + # if msg.get("role") == "assistant" and (msg.get("plots") or msg.get("files") or (msg.get("middle_steps") and msg.get("middle_steps").startswith("####"))): + # source_data = msg; data_origin = 'live_message_default'; break + + # with self.details_container: + # if not source_data: + # ui.label("Select 'View Plots & Tables' or a bookmark for details.").classes("text-gray-500 m-4 text-center italic"); return + + # if data_origin == 'live_message' or data_origin == 'live_message_default': + # msg_data = source_data + # if msg_data.get("role") != "assistant": ui.label("Details only for assistant messages.").classes("text-gray-500 m-4"); return + # ui.markdown(f"##### Agent Response Details") + # user_query = "N/A"; current_idx = self.messages.index(msg_data) if msg_data in self.messages else -1 + # if current_idx > 0: + # idx = current_idx -1 + # while idx >= 0: + # if self.messages[idx].get("role") == "user": user_query = self.messages[idx].get("content", "N/A"); break + # idx -=1 + # if user_query != "N/A": + # # ui.markdown("###### Regarding Query:").classes("text-gray-700 font-semibold mt-1 text-sm") + # ui.markdown(f"Query: {user_query[:250]}{'...' if len(user_query)>250 else ''}").classes("text-gray-800 p-2 text-sm bg-slate-100 rounded-md border") + + # # Explanation is in main chat, plots, files, middle_steps are primary for details + # plots = msg_data.get("plots", []); files = msg_data.get("files", []); middle_steps_str = msg_data.get("middle_steps") + + # elif data_origin == 'bookmark': + # ui.markdown("##### Bookmarked Item Details") + # user_query = source_data.get("user_question", "N/A") + # assistant_resp = source_data.get("assistant_response", {}) + # # explanation = assistant_resp.get("content", "") # Not showing main explanation again + # plots = assistant_resp.get("plots", []); files = assistant_resp.get("files", []); middle_steps_str = assistant_resp.get("middle_steps") + # if user_query != "N/A": + # ui.markdown("###### User Query:").classes("text-gray-700 font-semibold mt-1 text-sm") + # ui.markdown(f"{user_query}").classes("text-gray-800 p-2 text-sm bg-slate-100 rounded-md border") + + # ui.separator().classes("my-3") + + # if middle_steps_str and middle_steps_str.strip() and middle_steps_str.startswith("####"): + # with ui.expansion("Agent's Workings (Intermediate Steps)", icon="list_alt", value=True).classes("w-full my-2 border rounded-md shadow-sm"): # Open by default + # with ui.card_section().classes("bg-gray-50 p-2"): + # ui.markdown(middle_steps_str).classes('middle-steps-content') # Uses CSS class + # elif middle_steps_str: + # ui.markdown("###### Agent's Workings:").classes("mt-2 text-sm") + # ui.markdown(middle_steps_str).classes("italic text-sm text-gray-500") + + # if plots: # ... (Plot rendering logic from previous response) + # ui.markdown("###### Plots").classes("mt-2 text-sm") + # with ui.grid(columns=1).classes("gap-2 w-full"): + # for plot_path_str in plots: + # plot_path = Path(plot_path_str) + # if plot_path.is_file(): + # with ui.card().tight().classes("w-full shadow-lg"): + # ui.image(str(plot_path)).classes('max-w-full h-auto rounded-t-lg border-b object-contain') + # with ui.card_actions().props("align=right"): ui.button(icon="download", on_click=lambda p=str(plot_path): ui.download(p)).props("flat dense size=sm color=primary round").tooltip("Download Plot") + # else: ui.label(f"Plot file not found: {plot_path.name}").classes('text-red-400 text-xs') + # if files: # ... (File rendering logic from previous response) + # ui.markdown("###### Files & Data").classes("mt-2 text-sm") + # for file_path_str in files: + # file_path = Path(file_path_str) + # if file_path.is_file(): + # with ui.card().tight().classes("my-2 w-full shadow-lg"): + # with ui.card_section().classes("flex justify-between items-center p-2"): + # ui.label(file_path.name).classes("font-semibold text-sm"); ui.button(icon="download", on_click=lambda p=str(file_path): ui.download(p)).props("flat dense size=sm color=primary round").tooltip("Download File") + # if file_path.suffix.lower() in ['.csv', '.tsv']: # This is likely your line 585 + # # IMPORTANT: The following lines MUST be indented + # ui.separator() + # with ui.card_section().classes("p-0"): + # try: + # df_preview = pd.read_csv(file_path, sep=',' if file_path.suffix.lower() == '.csv' else '\t') + # ui.table.from_pandas(df_preview.head(5)).classes('h-[200px] overflow-auto w-full bordered text-xs').props('dense flat bordered separator=cell') + # except Exception as e_df: + # ui.label(f"Preview failed: {e_df}").classes('text-orange-400 text-xs p-2') + + # elif file_path.suffix.lower() == '.html': # This is likely your line 586 + # # IMPORTANT: The following lines MUST be indented + # ui.separator() + # with ui.card_section(): + # try: + # with open(file_path, 'r', encoding='utf-8') as f_html: + # html_content = f_html.read() + # ui.html(html_content).classes('max-h-96 h-[350px] overflow-auto border w-full p-1') + # except Exception as e_html: + # ui.label(f"HTML display failed: {e_html}").classes('text-orange-400 text-xs p-2') + + # if not plots and not files and not (middle_steps_str and middle_steps_str.startswith("####")): + # ui.label("No specific plots, files, or detailed agent steps for this message.").classes("m-2 text-gray-500") + + # Inside your NiceGuiApp class: + + def update_details_pane(self): + if not self.details_container: + logging.warning("Details container NA for update.") + return + self.details_container.clear() + + source_data = None + data_origin = None + + # Logic to determine source_data and data_origin (from live message or bookmark) + # This part should remain as you have it. For example: + if self.selected_bookmark_for_details: + source_data = self.selected_bookmark_for_details + data_origin = 'bookmark' + elif self.selected_message_for_details_idx is not None and \ + self.selected_message_for_details_idx < len(self.messages): + source_data = self.messages[self.selected_message_for_details_idx] + data_origin = 'live_message' + elif self.selected_message_for_details_idx is None: # Default to last assistant message with details + for i in range(len(self.messages) - 1, -1, -1): + msg = self.messages[i] + if msg.get("role") == "assistant" and \ + (msg.get("plots") or msg.get("files") or \ + (msg.get("middle_steps") and msg.get("middle_steps").startswith("####"))): + source_data = msg + data_origin = 'live_message_default' + break + + with self.details_container: + if not source_data: + ui.label("Select 'View Plots & Tables' or a bookmark for details.")\ + .classes("text-gray-500 m-4 text-center italic") + return + + # Initialize raw lists + plots_raw = [] + files_raw = [] + middle_steps_str = None + user_query = "N/A" # Default user query + + # Extract data based on origin (live message or bookmark) + if data_origin == 'live_message' or data_origin == 'live_message_default': + msg_data = source_data # source_data is the message dict + # ... (code to display "Agent Response Details" and find user_query - keep this part) ... + if msg_data.get("role") != "assistant": # Should be redundant if logic above is correct + ui.label("Details only for assistant messages.").classes("text-gray-500 m-4"); return + ui.markdown(f"##### Agent Response Details") + current_idx = self.messages.index(msg_data) if msg_data in self.messages else -1 + if current_idx > 0: + idx = current_idx -1 + while idx >= 0: + if self.messages[idx].get("role") == "user": user_query = self.messages[idx].get("content", "N/A"); break + idx -=1 + if user_query != "N/A": + ui.markdown("###### Regarding Query:").classes("text-gray-700 font-semibold mt-1 text-sm") + ui.markdown(f"{user_query[:250]}{'...' if len(user_query)>250 else ''}").classes("text-gray-800 p-2 text-sm bg-slate-100 rounded-md border") + + plots_raw = msg_data.get("plots", []) + files_raw = msg_data.get("files", []) + middle_steps_str = msg_data.get("middle_steps") + + elif data_origin == 'bookmark': + ui.markdown("##### Bookmarked Item Details") + user_query = source_data.get("user_question", "N/A") + assistant_resp = source_data.get("assistant_response", {}) + if user_query != "N/A": + ui.markdown("###### User Query:").classes("text-gray-700 font-semibold mt-1 text-sm") + ui.markdown(f"{user_query}").classes("text-gray-800 p-2 text-sm bg-slate-100 rounded-md border") + + plots_raw = assistant_resp.get("plots", []) + files_raw = assistant_resp.get("files", []) + middle_steps_str = assistant_resp.get("middle_steps") + + ui.separator().classes("my-3") + + # Display Middle Steps (as you have it) + if middle_steps_str and middle_steps_str.strip() and middle_steps_str.startswith("####"): + with ui.expansion("Agent's Workings (Intermediate Steps)", icon="list_alt", value=True)\ + .classes("w-full my-2 border rounded-md shadow-sm"): + with ui.card_section().classes("bg-gray-50 p-2"): + ui.markdown(middle_steps_str).classes('middle-steps-content') + elif middle_steps_str: + ui.markdown("###### Agent's Workings:").classes("mt-2 text-sm") + ui.markdown(middle_steps_str).classes("italic text-sm text-gray-500") + + # --- FLATTEN AND DISPLAY PLOTS --- + plots_flattened = [] + if plots_raw: # Check if plots_raw is not empty + for item in plots_raw: + if isinstance(item, list): # If item is a list, iterate its sub-items + for sub_item in item: + if isinstance(sub_item, (str, Path)): # Ensure sub_item is a path-like string + plots_flattened.append(sub_item) + else: + logging.warning(f"Skipping invalid sub-item in plot path list: {sub_item} (type: {type(sub_item)})") + elif isinstance(item, (str, Path)): # If item is already a path-like string + plots_flattened.append(item) + else: + logging.warning(f"Skipping invalid item in plot path list: {item} (type: {type(item)})") + + if plots_flattened: + ui.markdown("###### Plots").classes("mt-2 text-sm") + with ui.grid(columns=1).classes("gap-2 w-full"): # Adjust columns if you want multiple plots per row + for plot_path_str in plots_flattened: + try: + plot_path = Path(plot_path_str) # This should now receive a string or Path object + if plot_path.is_file(): + with ui.card().tight().classes("w-full shadow-lg"): + ui.image(str(plot_path)).classes('max-w-full h-auto rounded-t-lg border-b object-contain') + with ui.card_actions().props("align=right"): + ui.button(icon="download", on_click=lambda p=str(plot_path): ui.download(p))\ + .props("flat dense size=sm color=primary round").tooltip("Download Plot") + else: + ui.label(f"Plot file not found: {plot_path.name}").classes('text-red-400 text-xs p-1') + except TypeError as te: + logging.error(f"TypeError when creating Path for plot: {plot_path_str} - {te}", exc_info=True) + ui.label(f"Invalid plot path: {str(plot_path_str)[:50]}...").classes('text-orange-500 text-xs p-1') + except Exception as e_plot: + logging.error(f"Error rendering plot {plot_path_str}: {e_plot}", exc_info=True) + ui.label(f"Error for plot {str(plot_path_str)[:50]}...").classes('text-red-500 text-xs p-1') + + # --- FLATTEN AND DISPLAY FILES (similar logic) --- + files_flattened = [] + if files_raw: # Check if files_raw is not empty + for item in files_raw: + if isinstance(item, list): + for sub_item in item: + if isinstance(sub_item, (str, Path)): + files_flattened.append(sub_item) + else: + logging.warning(f"Skipping invalid sub-item in file path list: {sub_item} (type: {type(sub_item)})") + elif isinstance(item, (str, Path)): + files_flattened.append(item) + else: + logging.warning(f"Skipping invalid item in file path list: {item} (type: {type(item)})") + + if files_flattened: + ui.markdown("###### Files & Data").classes("mt-2 text-sm") + for file_path_str in files_flattened: + # ... (your existing logic for displaying files, using file_path_str) ... + # Ensure you use Path(file_path_str) within a try-except block here too. + try: + file_path = Path(file_path_str) + if file_path.is_file(): + with ui.card().tight().classes("my-2 w-full shadow-lg"): + with ui.card_section().classes("flex justify-between items-center p-2"): + ui.label(file_path.name).classes("font-semibold text-sm") + ui.button(icon="download", on_click=lambda p=str(file_path): ui.download(p))\ + .props("flat dense size=sm color=primary round").tooltip("Download File") + if file_path.suffix.lower() in ['.csv', '.tsv']: + ui.separator() + with ui.card_section().classes("p-0"): + try: + df_preview = pd.read_csv(file_path, sep=',' if file_path.suffix.lower() == '.csv' else '\t') + ui.table.from_pandas(df_preview.head(3)).classes('h-[150px] overflow-auto w-full bordered text-xs').props('dense flat bordered separator=cell') + except Exception as e_df: + ui.label(f"Preview failed: {e_df}").classes('text-orange-400 text-xs p-2') + elif file_path.suffix.lower() == '.html': + ui.separator() + with ui.card_section().classes("p-1"): + try: + with open(file_path, 'r', encoding='utf-8') as f_html: + html_content = f_html.read() + ui.html(html_content).classes('max-h-96 h-[350px] overflow-auto border w-full') + except Exception as e_html: + ui.label(f"HTML display failed: {e_html}").classes('text-orange-400 text-xs p-2') + else: + ui.label(f"Data file not found: {file_path.name}").classes('text-red-400 text-xs p-1') + except TypeError as te: + logging.error(f"TypeError when creating Path for file: {file_path_str} - {te}", exc_info=True) + ui.label(f"Invalid file path: {str(file_path_str)[:50]}...").classes('text-orange-500 text-xs p-1') + except Exception as e_file: + logging.error(f"Error rendering file entry {file_path_str}: {e_file}", exc_info=True) + ui.label(f"Error for file {str(file_path_str)[:50]}...").classes('text-red-500 text-xs p-1') + + if not plots_flattened and not files_flattened and not (middle_steps_str and middle_steps_str.startswith("####")): + ui.label("No specific plots, files, or detailed agent steps for this message.")\ + .classes("m-2 text-gray-500") + + + # def update_chat_display(self): # Modified + # if not self.chat_container: return + # self.chat_container.clear() + # with self.chat_container: + # for i, msg_data in enumerate(self.messages): + # role, is_user = msg_data.get("role", "assistant"), msg_data.get("role") == "user" + # name = self.user_id if is_user else "Agent"; avatar_char = name[0].upper() if name else ('U' if is_user else 'A') + # timestamp_str = msg_data.get("timestamp", ""); stamp_display = "" + # if timestamp_str: + # try: stamp_display = pd.to_datetime(timestamp_str).strftime('%I:%M %p') + # except Exception: stamp_display = timestamp_str + + # chat_message_props = (f"text-color=black bg-color={'blue-1' if is_user else 'grey-2'} name-color={'indigo-8' if is_user else 'deep-purple-8'} stamp-color=grey-7") + + # with ui.chat_message(name=name, sent=is_user, stamp=stamp_display, avatar=f'https://robohash.org/{avatar_char}?set=set3&bgset=bg1' if is_user else f'https://robohash.org/{avatar_char}?set=set5&bgset=bg2').props(chat_message_props).classes('w-full rounded-lg shadow-sm'): + # original_content_value = msg_data.get("content", "") # Get the content as is + # msg_type = msg_data.get("type", "text") + + # # ---- MODIFICATION START ---- + # # Ensure content is a string before passing to ui.markdown + # final_content_for_markdown: str + # if isinstance(original_content_value, list): + # # If content is a list (e.g., from old chat history), join its elements. + # # map(str, ...) ensures all elements are strings before joining. + # final_content_for_markdown = "\n".join(map(str, original_content_value)) + # else: + # # If content is not a list, convert to string (it might already be a string). + # final_content_for_markdown = str(original_content_value) + # # ---- MODIFICATION END ---- + + # ui.markdown(final_content_for_markdown).classes('text-sm link-styling') # Line 738 in new traceback + + # if role == "assistant": + # with ui.row().classes("items-center -ml-1 mt-1"): # Group buttons + # if msg_type == "text_with_attachments" and (bool(msg_data.get("plots")) or bool(msg_data.get("files")) or (msg_data.get("middle_steps") and msg_data.get("middle_steps").startswith("####"))): + # ui.button("View Plots & Tables", icon="table_chart", on_click=lambda bound_idx=i: self.show_details_for_message(bound_idx)).props('flat color=teal rounded size=sm').classes('text-xs px-2 py-0.5') # Changed icon and color + + # if not msg_data.get('bookmarked'): + # ui.button(icon="bookmark_add", on_click=lambda msg_idx=i: self.add_bookmark(msg_idx)).props("flat dense round color=amber-8 size=sm").tooltip("Bookmark this response") + # else: + # ui.icon("bookmark_added", color="amber-8 size-5").classes("ml-2 cursor-default").tooltip("Bookmarked") # size-5 for icon + + # if msg_type == "text_with_candidates": # THIS COULD BE YOUR LINE 637 + # # EVERYTHING INSIDE THIS 'if' BLOCK MUST BE INDENTED FURTHER + # for cand_idx, cand in enumerate(msg_data.get("candidates", [])): + # with ui.expansion(f"Candidate {cand_idx+1}: {cand.get('option', 'Option')}", icon='ballot').classes('w-full my-1 text-xs shadow-sm rounded-md border'): + # ui.markdown(f"**Expl:** {cand.get('explanation', '')[:150]}...").classes("p-1") + # if self.chat_input_field: + # ui.button("Use this", icon='check_circle_outline', + # on_click=lambda c=cand, ci=self.chat_input_field: ( + # ci.set_value(f"Regarding candidate '{c.get('option','')}': {c.get('explanation','')}... Please proceed."), + # ci.run_method('focus')) + # ).props(f'flat dense size=xs key="refine_{i}_{cand_idx}"').classes("m-1") + + # # The 'if' for next_steps should be at the same indentation level as + # # 'if msg_type == "text_with_candidates":' IF IT'S A SEPARATE, INDEPENDENT CHECK, + # # OR it could be an 'elif' or nested if its logic depends on the previous one. + # # In my last version, it was an independent check for next_steps. + # if msg_data.get("next_steps"): # THIS IS LIKELY YOUR LINE 638 + # # EVERYTHING INSIDE THIS 'if' BLOCK MUST BE INDENTED FURTHER + # with ui.row().classes("mt-2 gap-1 flex-wrap items-center"): # mt-2 for spacing + # ui.markdown("**Next:**").classes("self-center text-xs mr-1 text-gray-700") + # for step_idx, step in enumerate(msg_data["next_steps"][:3]): + # if self.chat_input_field: + # ui.button(step, + # on_click=lambda s=step, ci=self.chat_input_field: (ci.set_value(s), ci.run_method('focus'))) \ + # .props(f'flat dense no-caps key="next_step_{i}_{step_idx}"') \ + # .classes('text-sm bg-indigo-50 hover:bg-indigo-100 text-indigo-700 rounded-full px-3 py-1') + # def scroll_chat_to_bottom(): + # if self.chat_container and self.chat_container.client.has_socket_connection: + # chat_id = self.chat_container.id + # # Use ui.run_javascript to execute a script that finds the element by ID and scrolls it + # js_command = f"var el = getElement({chat_id}); if (el) {{ el.scrollTop = el.scrollHeight; }}" + # ui.run_javascript(js_command) + # logging.debug(f"Attempted to scroll chat container (ID: {chat_id}) to bottom via ui.run_javascript.") + # elif self.chat_container: + # logging.debug("Chat container exists but no socket connection for scroll.") + # else: + # logging.warning("Chat container does not exist when trying to schedule scroll.") + + # # Use a short timer to ensure the JS runs after client has processed UI updates + # # and new message elements are rendered, making scrollHeight accurate. + # ui.timer(0.1, scroll_chat_to_bottom, once=True) # 0.1 second delay + + + + def update_chat_display(self): + if not self.chat_container: + logging.warning("Chat container not available for update_chat_display.") + return + + self.chat_container.clear() + with self.chat_container: + for i, msg_data in enumerate(self.messages): + role, is_user = msg_data.get("role", "assistant"), msg_data.get("role") == "user" + name = self.user_id if is_user else "Agent" + avatar_char = name[0].upper() if name else ('U' if is_user else 'A') + + chat_message_props = ( + f"text-color=black bg-color={'blue-1' if is_user else 'grey-2'} " + f"name-color={'indigo-8' if is_user else 'deep-purple-8'}" + # stamp-color prop on q-chat-message is for its own stamp, which we are bypassing. + ) + + # We will NOT use the 'stamp' prop of ui.chat_message. + # We will add our own ui.html element for the timestamp. + with ui.chat_message( + name=name, + sent=is_user, + avatar=f'https://robohash.org/{avatar_char}?set=set5&bgset=bg1' if is_user else f'https://robohash.org/{avatar_char}?set=set3&bgset=bg2' + ).props(chat_message_props).classes('w-full rounded-lg shadow-sm'): + + # This column will hold the main content and then our custom stamp below it. + with ui.column().classes('w-full no-wrap pa-0 ma-0'): # Ensure no extra padding/margin from this column + original_content_value = msg_data.get("content", "") + msg_type = msg_data.get("type", "text") + final_content_for_markdown: str + if isinstance(original_content_value, list): + final_content_for_markdown = "\\n".join(map(str, original_content_value)) + else: + final_content_for_markdown = str(original_content_value) + + ui.markdown(final_content_for_markdown).classes('text-sm link-styling') + + # --- CUSTOM CLIENT-SIDE TIMESTAMP RENDERING --- + raw_timestamp_str = msg_data.get("timestamp") + utc_iso_for_js = None + + if raw_timestamp_str and isinstance(raw_timestamp_str, str) and raw_timestamp_str.strip(): + # Python pre-processing to ensure string is explicitly UTC for JS + try: + has_timezone_info = 'Z' in raw_timestamp_str.upper() or \ + ('+' in raw_timestamp_str[10:]) or \ + ('-' in raw_timestamp_str[10:] and raw_timestamp_str.count(':') >= 2) + if has_timezone_info: + dt_obj = pd.to_datetime(raw_timestamp_str) + utc_iso_for_js = dt_obj.tz_convert('UTC').isoformat() + else: + utc_iso_for_js = raw_timestamp_str + "Z" + except Exception as e_parse: + logging.warning(f"PYTHON_DEBUG: Could not fully normalize timestamp '{raw_timestamp_str}', using as-is or Z-appended for JS: {e_parse}") + if 'T' in raw_timestamp_str and not ('Z' in raw_timestamp_str.upper() or '+' in raw_timestamp_str[10:] or ('-' in raw_timestamp_str[10:] and raw_timestamp_str.count(':') >=2 )): + utc_iso_for_js = raw_timestamp_str + "Z" + else: + utc_iso_for_js = raw_timestamp_str # Pass potentially problematic string if normalization failed + + if utc_iso_for_js: + timestamp_dom_id = f"custom_ts_element_{uuid.uuid4().hex[:8]}" + + # This HTML element will contain our timestamp. + # We apply classes to make it look like a Quasar stamp. + # It's crucial this is styled and positioned correctly. + # It is now a sibling to the ui.markdown output, inside the ui.column. + ui.html(f'
') + + js_code_to_format_stamp = f""" + (function() {{ + var el = document.getElementById('{timestamp_dom_id}'); + var utcTimestampStr = '{utc_iso_for_js}'; + // console.log('[JS_DEBUG] Processing ID: {timestamp_dom_id}, UTC String: "' + utcTimestampStr + '"'); + if (el) {{ + try {{ + var date = new Date(utcTimestampStr); + if (isNaN(date.getTime())) {{ + el.textContent = ''; // Or '[time N/A]' + // console.warn('[JS_DEBUG] Invalid Date for ID {timestamp_dom_id} from:', utcTimestampStr); + }} else {{ + el.textContent = date.toLocaleTimeString(undefined, {{ + hour: 'numeric', + minute: '2-digit', + hour12: true + }}); + }} + }} catch (e) {{ + el.textContent = ''; // Or '[time error]' + // console.error('[JS_DEBUG] JS Exception for ID {timestamp_dom_id}:', e); + }} + }} else {{ + // console.warn('[JS_DEBUG] Timestamp DOM element NOT FOUND by ID:', '{timestamp_dom_id}'); + }} + }})(); + """ + ui.timer(0.15, lambda code=js_code_to_format_stamp: ui.run_javascript(code), once=True) + # --- END CUSTOM CLIENT-SIDE TIMESTAMP --- + + # ... (Your existing logic for assistant message buttons, candidates, next_steps) ... + # (Ensure this logic is placed correctly relative to the ui.column holding markdown and stamp) + if role == "assistant": + with ui.row().classes("items-center -ml-1 mt-1"): # This row is a sibling to the ui.column above + if msg_type == "text_with_attachments" and \ + (bool(msg_data.get("plots")) or \ + bool(msg_data.get("files")) or \ + (msg_data.get("middle_steps") and msg_data.get("middle_steps").startswith("####"))): + ui.button("View Plots & Tables", icon="table_chart", on_click=lambda bound_idx=i: self.show_details_for_message(bound_idx))\ + .props('flat color=teal rounded size=sm').classes('text-xs px-2 py-0.5') + if not msg_data.get('bookmarked'): + ui.button(icon="bookmark_add", on_click=lambda msg_idx=i: self.add_bookmark(msg_idx))\ + .props("flat dense round color=amber-8 size=sm").tooltip("Bookmark this response") + else: + ui.icon("bookmark_added", color="amber-8 size-5").classes("ml-2 cursor-default").tooltip("Bookmarked") + if msg_type == "text_with_candidates": + for cand_idx, cand in enumerate(msg_data.get("candidates", [])): + with ui.expansion(f"Candidate {cand_idx+1}: {cand.get('option', 'Option')}", icon='ballot').classes('w-full my-1 text-xs shadow-sm rounded-md border'): + ui.markdown(f"**Expl:** {cand.get('explanation', '')[:150]}...").classes("p-1") + if self.chat_input_field: + ui.button("Use this", icon='check_circle_outline', + on_click=lambda c=cand, ci=self.chat_input_field: ( + ci.set_value(f"Regarding candidate '{c.get('option','')}': {c.get('explanation','')}... Please proceed."), + ci.run_method('focus')) + ).props(f'flat dense size=xs key="refine_{i}_{cand_idx}"').classes("m-1") + if msg_data.get("next_steps"): + with ui.row().classes("mt-2 gap-1 flex-wrap items-center"): + ui.markdown("**Next:**").classes("self-center text-xs mr-1 text-gray-700") + for step_idx, step in enumerate(msg_data["next_steps"][:3]): + if self.chat_input_field: + ui.button(step, + on_click=lambda s=step, ci=self.chat_input_field: (ci.set_value(s), ci.run_method('focus'))) \ + .props(f'flat dense no-caps key="next_step_{i}_{step_idx}"') \ + .classes('text-sm bg-indigo-50 hover:bg-indigo-100 text-indigo-700 rounded-full px-3 py-1') + + + # Scroll to bottom logic + def scroll_chat_to_bottom(): + if self.chat_container and self.chat_container.client.has_socket_connection: + chat_id = self.chat_container.id + js_command = f"var el = getElement({chat_id}); if (el) {{ el.scrollTop = el.scrollHeight; }}" + ui.run_javascript(js_command) + ui.timer(0.1, scroll_chat_to_bottom, once=True) + + # Place this method inside your NiceGuiApp class + def _handle_drawer_escape_key(self, e): # 'e' is the event argument from ui.keyboard + """Handles keyboard events to close the left drawer on Escape key press (on keydown).""" + try: + key_obj = getattr(e, 'key', None) + action_obj = getattr(e, 'action', None) + + is_escape = False + if key_obj: + # Prioritize boolean flag if available (e.g_ e.key.escape) + if hasattr(key_obj, 'escape') and key_obj.escape is True: + is_escape = True + # Fallback to checking the key name if the boolean flag isn't present/true + elif hasattr(key_obj, 'name') and isinstance(key_obj.name, str) and key_obj.name.lower() == 'escape': + is_escape = True + + is_keydown = False + if action_obj and hasattr(action_obj, 'keydown') and action_obj.keydown is True: + is_keydown = True + + if is_escape and is_keydown: # Process only on keydown of Escape + if self.left_drawer and self.left_drawer.value: # Check if drawer exists and is open + self.left_drawer.value = False + # import logging # Make sure logging is imported in your class/module + # logging.debug("Left drawer closed via Escape key.") # Optional logging + except AttributeError: + # Gracefully handle cases where 'e' or its attributes might not have the expected structure + # import logging + # logging.debug(f"Key event with unexpected structure for Esc handling: {e}") + pass + + # def build_ui(self): + # ui.add_head_html(""" + # + # """) + # self.left_drawer = ui.left_drawer(elevated=True, top_corner=True, bottom_corner=True).props('overlay breakpoint=lg').style('background-color: #f4f6f8;').classes('p-4 w-80 lg:w-96 border-r') + # with ui.header(elevated=True).style('background-color: #303f9f;').classes('items-center justify-between text-white q-px-md'): + # ui.label("Galaxy Chat Analysis").classes("text-xl md:text-2xl font-semibold tracking-wide") + # if self.left_drawer: ui.button(icon='menu', on_click=self.left_drawer.toggle).props('flat round color=white') + + # with self.left_drawer: + # ui.label("Configuration").classes("text-lg font-semibold mb-3 text-indigo-800") + # self.sidebar_api_status_label = ui.label("Agent: Unknown").classes("mb-3 text-xs p-1 rounded") + # self.model_select_element = ui.select(self.MODEL_OPTIONS_SELECT, label="LLM Model", value=self.selected_model_id, on_change=self.handle_model_change).props("outlined dense emit-value map-options").classes("w-full mb-3") + # with ui.expansion("API Keys", icon="key", value=False).classes("w-full mb-3 text-sm"): # ... API Key inputs ... + # self.openai_key_input = ui.input(label="OpenAI API Key", password=True, value=self.openai_api_key, on_change=lambda e: setattr(self, 'openai_api_key', e.value)).props("dense outlined clearable") + # ui.button("Save OpenAI", on_click=self.save_openai_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + # self.groq_key_input = ui.input(label="Groq API Key", password=True, value=self.groq_api_key, on_change=lambda e: setattr(self, 'groq_api_key', e.value)).props("dense outlined clearable mt-2") + # ui.button("Save Groq", on_click=self.save_groq_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + + # ui.separator().classes("my-3") + # ui.label("Dataset").classes("text-md font-semibold mb-2 text-indigo-700") # ... Dataset upload ... + # ui.upload(label="Upload New Dataset", auto_upload=False, on_upload=self.handle_upload, max_file_size=200 * 1024 * 1024).props("accept=.csv,.tsv,.h5ad,.xlsx,.xls,.json,.parquet,.h5,.fa,.fasta,.vcf,.gtf,.gff,.bed").classes("w-full mb-3") + + # ui.label("Analysis Actions").classes("text-md font-semibold mt-3 mb-2 text-indigo-700") # ... EDA button ... + # ui.button("Run Full EDA", on_click=self.run_eda_action, icon="query_stats").classes("w-full mb-1").props("color=deep-purple-6 dense") + # ui.separator().classes("my-3") + + # # Bookmarks Section + # with ui.expansion("⭐ Bookmarks", icon="bookmarks", value=True).classes("w-full text-sm"): + # self.bookmarks_container = ui.column().classes("w-full max-h-100 overflow-y-auto gap-1") # Scrollable bookmark list + # self.update_sidebar_bookmarks() # Initial population + + # with ui.splitter(value=60, reverse=False, limits=(30,70)).classes('w-full h-[calc(100vh-110px)] no-wrap') as main_splitter: + # with main_splitter.before: + # with ui.column().classes("w-full h-full p-0 items-stretch no-wrap items-stretch overflow-hidden"): + # self.chat_container = ui.column().classes("w-full flex-grow overflow-y-auto p-2 md:p-3 bg-gray-100") + # with ui.row().classes("w-full p-2 bg-slate-200 items-center border-t"): + # self.chat_input_field = ui.input(placeholder="Ask about the dataset...").props("bg-color=white outlined dense clearable rounded").classes("flex-grow").on('keydown.enter', lambda: self.handle_user_input(self.chat_input_field.value), throttle=0.5) + # ui.button(icon="send", on_click=lambda: self.handle_user_input(self.chat_input_field.value)).props("round color=indigo-6 dense unelevated") + # ui.label("This agent can make mistakes, so double-check it.") \ + # .classes("w-full text-xs text-gray-600 p-2 text-center bg-slate-100 border-t flex-shrink-0") + # with main_splitter.after: + # with ui.column().classes("w-full h-full items-stretch overflow-y-auto bg-slate-50"): # This makes the whole right pane scroll + # ui.label("Details & Preview").classes("text-md font-semibold mb-2 text-gray-700 sticky top-0 bg-slate-100/95 backdrop-blur-sm z-10 p-3 border-b shadow-sm") + # with ui.column().classes("p-2 md:p-3 flex-grow"): # Content area below sticky header + + # self.details_container = ui.column().classes("w-full flex-grow p-2 border rounded-lg bg-white shadow mt-2 min-h-[200px]") + # self.dataset_preview_area = ui.column().classes("w-full mb-3 p-2 border rounded-lg bg-white shadow") + + # app.on_connect(self.on_page_load_actions) + # app.on_disconnect(self.on_page_unload_actions) + + + + def build_ui(self): + ui.add_head_html(""" + + """) + self.left_drawer = ui.left_drawer(elevated=True, top_corner=True, bottom_corner=True)\ + .props('overlay breakpoint=lg').style('background-color: #f4f6f8;')\ + .classes('p-4 w-80 lg:w-96 border-r') + + # Header with menu button on the right (as per your provided code) + with ui.header(elevated=True).style('background-color: #303f9f;').classes('items-center text-white q-px-md'): + if self.left_drawer: + ui.button(icon='menu', on_click=self.left_drawer.toggle).props('flat round color=white') + + ui.label("Galaxy Chat Analysis").classes("text-xl md:text-2xl font-semibold tracking-wide") + + with self.left_drawer: + # ---- MODIFIED: Added Row for Title and Close Button ---- + with ui.row().classes("w-full items-center justify-between no-wrap mb-2"): # Use mb-2 here + ui.label("Configuration").classes("text-lg font-semibold text-indigo-800") # Removed mb-3 + ui.button(icon='close', on_click=lambda: setattr(self.left_drawer, 'value', False)) \ + .props('flat round dense color=grey-7').tooltip("Close Sidebar") + # ---- END MODIFICATION ---- + + self.sidebar_api_status_label = ui.label("Agent: Unknown").classes("mb-3 text-xs p-1 rounded") + self.model_select_element = ui.select(self.MODEL_OPTIONS_SELECT, label="LLM Model", value=self.selected_model_id, on_change=self.handle_model_change).props("outlined dense emit-value map-options").classes("w-full mb-3") + with ui.expansion("API Keys", icon="key", value=False).classes("w-full mb-3 text-sm"): + self.openai_key_input = ui.input(label="OpenAI API Key", password=True, value=self.openai_api_key, on_change=lambda e: setattr(self, 'openai_api_key', e.value)).props("dense outlined clearable") + ui.button("Save OpenAI", on_click=self.save_openai_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + self.groq_key_input = ui.input(label="Groq API Key", password=True, value=self.groq_api_key, on_change=lambda e: setattr(self, 'groq_api_key', e.value)).props("dense outlined clearable mt-2") + ui.button("Save Groq", on_click=self.save_groq_key, icon="save").classes("w-full mt-1").props("color=indigo-6 dense size=sm") + + ui.separator().classes("my-3") + ui.label("Dataset").classes("text-md font-semibold mb-2 text-indigo-700") + ui.upload(label="Upload New Dataset", auto_upload=False, on_upload=self.handle_upload, max_file_size=200 * 1024 * 1024).props("accept=.csv,.tsv,.h5ad,.xlsx,.xls,.json,.parquet,.h5,.fa,.fasta,.vcf,.gtf,.gff,.bed").classes("w-full mb-3") + + ui.label("Analysis Actions").classes("text-md font-semibold mt-3 mb-2 text-indigo-700") + ui.button("Run Full EDA", on_click=self.run_eda_action, icon="query_stats").classes("w-full mb-1").props("color=deep-purple-6 dense") + ui.separator().classes("my-3") + + with ui.expansion("⭐ Bookmarks", icon="bookmarks", value=True).classes("w-full text-sm"): + # ---- MODIFIED: max-h-100 to max-h-96 (standard Tailwind class) ---- + self.bookmarks_container = ui.column().classes("w-full max-h-96 overflow-y-auto gap-1") + self.update_sidebar_bookmarks() + + with ui.splitter(value=60, reverse=False, limits=(30,70)).classes('w-full h-[calc(100vh-110px)] no-wrap') as main_splitter: + with main_splitter.before: + # ---- MODIFIED: Corrected classes for flex layout ---- + # Replaced repeated 'items-stretch' with 'flex flex-col' and added 'min-h-0' + with ui.column().classes("w-full h-full p-0 flex flex-col no-wrap items-stretch overflow-hidden min-h-0"): + # ---- MODIFIED: Added min-h-0 to chat_container ---- + self.chat_container = ui.column().classes("w-full flex-grow overflow-y-auto p-2 md:p-3 bg-gray-100 min-h-0") + + # Chat input row - added flex-shrink-0 for stability + with ui.row().classes("w-full p-2 bg-slate-200 items-center border-t flex-shrink-0"): + self.chat_input_field = ui.input(placeholder="Ask about the dataset...")\ + .props("bg-color=white outlined dense clearable rounded").classes("flex-grow")\ + .on('keydown.enter', lambda: self.handle_user_input(self.chat_input_field.value), throttle=0.5) + ui.button(icon="send", on_click=lambda: self.handle_user_input(self.chat_input_field.value))\ + .props("round color=indigo-6 dense unelevated") + + # ---- MODIFIED: Disclaimer moved out of the input row to be below it ---- + ui.label("This agent can make mistakes, so double-check it.") \ + .classes("w-full text-xs text-gray-600 p-2 text-center bg-slate-100 border-t flex-shrink-0") + + with main_splitter.after: + # This structure makes the whole right pane scroll, as per your provided version. + # The "Details & Preview" label will scroll with it. + with ui.column().classes("w-full h-full items-stretch overflow-y-auto bg-slate-50"): + ui.label("Details & Preview").classes("text-md font-semibold mb-2 text-gray-700 sticky top-0 bg-slate-100/95 backdrop-blur-sm z-10 p-3 border-b shadow-sm") + with ui.column().classes("p-2 md:p-3 flex-grow"): + # Order: Details then Dataset Preview (as per earlier request) + # Keeping your margins, but if details_container is first, mt-2 might not be needed. + self.details_container = ui.column().classes("w-full flex-grow p-2 border rounded-lg bg-white shadow mt-2 min-h-[200px]") + self.dataset_preview_area = ui.column().classes("w-full mb-3 p-2 border rounded-lg bg-white shadow") + + app.on_connect(self.on_page_load_actions) + app.on_disconnect(self.on_page_unload_actions) + + async def on_page_load_actions(self, client: Client): + logging.info(f"Client connected (User: {self.user_id}). Loading initial actions.") + dataset_loaded = False + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + self.current_dataset_file_path = self.initial_dataset_path_from_arg; self.current_dataset_display_name = self.current_dataset_file_path.name + self.current_input_data_type = self.cli_args.input_data_type + ui.notify(f"Loading dataset from arg: {self.current_dataset_display_name}", type='info', timeout=2000) + await self.preview_loaded_or_uploaded_dataset(); dataset_loaded = True + elif self.current_dataset_file_path and self.current_dataset_file_path.exists(): + ui.notify(f"Restoring session with: {self.current_dataset_display_name}", type='info', timeout=2000) + await self.preview_loaded_or_uploaded_dataset(); dataset_loaded = True + if not dataset_loaded and self.dataset_preview_area: + self.dataset_preview_area.clear(); + with self.dataset_preview_area: ui.label("Upload dataset or provide via CLI to start.").classes("text-gray-500 m-2") + self.update_chat_display(); self.update_details_pane(); self.update_sidebar_bookmarks(); self.try_initialize_agent() + + def on_page_unload_actions(self, client: Client): + logging.info(f"Client disconnected (User: {self.user_id}). Saving history.") + self.save_chat_history() + + def handle_model_change(self, e): + self.selected_model_id = e.value + self.selected_model_name = self.MODEL_OPTIONS_SELECT.get(self.selected_model_id, self.selected_model_id) + ui.notify(f"Model set to: {self.selected_model_name}", type='info', position='top-right', timeout=2000) + self.try_initialize_agent() + + def save_openai_key(self): + if self.openai_key_input: + self.openai_api_key = self.openai_key_input.value or "" + save_key_to_specific_file(OPENAI_API_KEY_FILE, self.openai_api_key) + ui.notify("OpenAI Key " + ("saved." if self.openai_api_key else "cleared."), type='positive' if self.openai_api_key else 'info') + self.try_initialize_agent() + + def save_groq_key(self): + if self.groq_key_input: + self.groq_api_key = self.groq_key_input.value or "" + save_key_to_specific_file(GROQ_API_KEY_FILE, self.groq_api_key) + ui.notify("Groq Key " + ("saved." if self.groq_api_key else "cleared."), type='positive' if self.groq_api_key else 'info') + self.try_initialize_agent() + + async def run_eda_action(self): + if not self.agent or not self.current_dataset_file_path: + ui.notify("Agent or dataset not ready for EDA.", type='warning'); return + eda_user_query = ("Perform a comprehensive EDA: summary statistics, missing values, data types, " + "correlation matrix, distributions for numerical, counts for categorical. Conclude with 3-5 insights.") + ui.notify("Starting EDA...", type='info') + await self.handle_user_input(eda_user_query) + + +# --- CLI Argument Parsing & App Run --- +if __name__ in {"__main__", "__mp_main__"}: + parser = argparse.ArgumentParser(description="Galaxy Chat Analysis with NiceGUI") + parser.add_argument("--user_id", nargs='?', default=f"user_{uuid.uuid4().hex[:6]}", help="User ID (defaults to a random ID).") + parser.add_argument("--openai_key_file", dest="cli_openai_key_file_path", help="Path to OpenAI API key file.") + parser.add_argument("--groq_key_file", dest="cli_groq_key_file_path", help="Path to Groq API key file.") + parser.add_argument("--chat_history", dest="chat_history_path", default=str(DEFAULT_CHAT_HISTORY_FILE), help="Path to chat history JSON file.") + parser.add_argument("--output_dir", dest="generate_file_path", default=str(DEFAULT_OUTPUT_DIR), help="Directory for generated files (plots, data).") + parser.add_argument("--input_file", dest="input_file_path", help="Path to an initial dataset file to load.") + parser.add_argument("--input_type", dest="input_data_type", default="csv", help="Type of the initial dataset file (e.g., csv, tsv, h5ad).") + cli_args = parser.parse_args() + + app_instance = NiceGuiApp(user_id=cli_args.user_id, cli_args_ns=cli_args) + + @ui.page('/') + def main_page_entry(client: Client): + app_instance.build_ui() + + ui.run(title="Galaxy Chat Analysis", storage_secret=str(uuid.uuid4()), + port=8090, reload=os.environ.get('NICEGUI_RELOAD', 'true').lower() != 'false', + uvicorn_logging_level='info', dark=False, + favicon=SCRIPT_PATH / "favicon.ico") # Changed logging to info for more Uvicorn details if needed during dev \ No newline at end of file diff --git a/pages/1_Bookmarks.py b/smolagents_agent/pages/1_Bookmarks.py similarity index 100% rename from pages/1_Bookmarks.py rename to smolagents_agent/pages/1_Bookmarks.py diff --git a/smolagents_agent/prompt.py b/smolagents_agent/prompt.py new file mode 100644 index 0000000..579dc5a --- /dev/null +++ b/smolagents_agent/prompt.py @@ -0,0 +1,135 @@ +# prompts.py + +CODE_AGENT_SYSTEM_PROMPT = """You are an expert data scientist who can solve any task using code blobs. You will be given a task to solve as best you can. +To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code. +To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. + +At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use. +Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '' sequence. +During each intermediate step, you can use 'print()' to save whatever important information you will then need. +These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step. + +**Important Output Requirements:** +When providing the final solution using the `final_answer` tool, you MUST structure your argument as a Python dictionary. This dictionary must have the following keys: +- `explanation`: A list of strings, where each string is a sentence or paragraph explaining your findings or the solution. +- `plots`: A list of strings, where each string is a path to a plot image file you generated. If no plots, provide an empty list. +- `files`: A list of strings, for paths to other data files you generated (e.g., CSVs). If no files, provide an empty list. +- `next_steps_suggestion`: A list of strings, offering 2-3 relevant follow-up questions the user might ask based on your findings. + +All generated files (plots, data files) MUST be saved in the `outputs_dir/` directory (e.g., `outputs_dir/my_plot_1a2b3c.png`). Ensure filenames are descriptive and unique. + +Here is an example of how to perform a data analysis task: +--- +Task: "Perform a comprehensive EDA on `dataset.csv`, including summary statistics, correlation (with heatmap), and distributions of numerical features. Save plots to `outputs_dir/` and provide insights." + +Thought: I will load the data using pandas, then calculate summary statistics. After that, I will compute the correlation matrix and generate a heatmap, saving it to `outputs_dir/`. Next, I will iterate through numerical features, plot their distributions, and save these plots also to `outputs_dir/`. I'll use `print()` to output the paths of the saved plots so they are available in the observation. Finally, I will compile all findings and paths into the specified dictionary structure and use the `final_answer` tool. +Code: +```py +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +import numpy as np + +# Assume outputs_dir is a predefined path for saving files. +# In a real scenario, this often comes from the environment or task setup. +outputs_dir = "outputs_dir" # For example purposes + +df = pd.read_csv("dataset.csv") # Replace with actual dataset path if different + +# 1. Summary statistics +summary_stats = df.describe() +print("--- Summary Statistics ---") +print(summary_stats) + +# 2. Correlation matrix and heatmap +# Ensure only numeric columns are used for correlation +numeric_df = df.select_dtypes(include=[np.number]) +correlation_matrix = numeric_df.corr() + +plt.figure(figsize=(12, 10)) +sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f") +plt.title('Correlation Heatmap of Numerical Features') +heatmap_file_path = f"{outputs_dir}/correlation_heatmap_example.png" +plt.savefig(heatmap_file_path) +plt.close() # Close plot to free memory +print(f"heatmap_path={heatmap_file_path}") + +# 3. Distributions of numerical features +numerical_features = numeric_df.columns +numerical_plot_paths = [] +for column in numerical_features: + plt.figure(figsize=(8, 5)) + sns.histplot(df[column].dropna(), kde=True, bins=30) + plt.title(f'Distribution of {column}') + plot_file_path = f"{outputs_dir}/{column}_distribution_example.png" + plt.savefig(plot_file_path) + plt.close() # Close plot + numerical_plot_paths.append(plot_file_path) +print(f"numerical_plot_paths={numerical_plot_paths}") +``` +Observation: +--- Summary Statistics --- + Feature1 Feature2 +count 100.000000 100.000000 +mean 50.000000 25.000000 +std 10.000000 5.000000 +... (truncated print output for summary_stats) +heatmap_path=outputs_dir/correlation_heatmap_example.png +numerical_plot_paths=['outputs_dir/Feature1_distribution_example.png', 'outputs_dir/Feature2_distribution_example.png'] + +Thought: The EDA steps are complete. I have generated summary statistics, a correlation heatmap, and distribution plots for numerical features. The paths to the saved plots (`heatmap_file_path` and `numerical_plot_paths`) are available from the previous step's code execution (as per rule 9) and confirmed in the observation. I will now use these to structure the final answer. +Code: +```py +# The variables heatmap_file_path and numerical_plot_paths were defined in the previous code block +# and persist due to rule 9. Their values were also printed in the observation. + +final_insights = { + "explanation": [ + "Comprehensive EDA has been performed on the dataset.", + "Summary statistics reveal the basic distribution of data.", + "The correlation heatmap shows relationships between numerical features.", + "Distribution plots for numerical features like 'Feature1' and 'Feature2' have been generated." + ], + "plots": [heatmap_file_path] + numerical_plot_paths, + "files": [], # No other data files were generated in this example + "next_steps_suggestion": [ + "How do these features relate to a specific target variable?", + "Are there any outliers that need further investigation or treatment?", + "What are the characteristics of categorical features in this dataset?" + ] +} +final_answer(final_insights) +``` + +Above example was specific to data analysis. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +{%- for tool in tools.values() %} +- {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} +{%- endfor %} + +{%- if managed_agents and managed_agents.values() | list %} +You can also give tasks to team members. +Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. +Given that this team member is a real human, you should be very verbose in your task. +Here is a list of the team members that you can call: +{%- for agent in managed_agents.values() %} +- {{ agent.name }}: {{ agent.description }} +{%- endfor %} +{%- else %} +{%- endif %} + +Here are the rules you should always follow to solve your task: +1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```' sequence, else you will fail. +2. Use only variables that you have defined! +3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'. +4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. +5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. +6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. +7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables. +8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}} +9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. +10. Don't give up! You're in charge of solving the task, not providing directions to solve it. + +Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. +""" \ No newline at end of file diff --git a/requirements.txt b/smolagents_agent/requirements.txt similarity index 100% rename from requirements.txt rename to smolagents_agent/requirements.txt diff --git a/smolagents_agent/requirements_nicegui.txt b/smolagents_agent/requirements_nicegui.txt new file mode 100644 index 0000000..8ef699b --- /dev/null +++ b/smolagents_agent/requirements_nicegui.txt @@ -0,0 +1,23 @@ +nicegui +pandas +python-dotenv +smolagents[litellm]==1.9.2 +scikit-learn +Pillow +numpy>=1.24.0 +matplotlib +seaborn +scipy +plotly +joblib +anndata +openpyxl +pyarrow +tables +biopython +pysam +gffutils +xgboost +lightgbm +catboost +psycopg2-binary \ No newline at end of file diff --git a/tools/chat_analysis.xml b/tools/chat_analysis.xml index 8786a80..b56c576 100644 --- a/tools/chat_analysis.xml +++ b/tools/chat_analysis.xml @@ -29,7 +29,7 @@ echo "$input_dataset.ext" && ln -sf $__tool_directory__/.env /ChatAnalysis/.env && python /ChatAnalysis/chat_dspy.py - --user_id "$__user_email__" + --user_id "$__user_name__" --openai_key_file "$openai_api_key_file" --groq_key_file "$groq_api_key_file" --chat_history "$chat_history" @@ -54,7 +54,7 @@ - + From 172c58503d8bdd37b3c126ac949277437076ee1a Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 20 Jun 2025 18:47:35 -0400 Subject: [PATCH 3/5] move two files to smolagents agent folder --- helper.py => smolagents_agent/helper.py | 0 smolagents_da.py => smolagents_agent/smolagents_da.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename helper.py => smolagents_agent/helper.py (100%) rename smolagents_da.py => smolagents_agent/smolagents_da.py (100%) diff --git a/helper.py b/smolagents_agent/helper.py similarity index 100% rename from helper.py rename to smolagents_agent/helper.py diff --git a/smolagents_da.py b/smolagents_agent/smolagents_da.py similarity index 100% rename from smolagents_da.py rename to smolagents_agent/smolagents_da.py From 96832428f04769fbc1b644e99c80ec8768500f43 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 20 Jun 2025 18:48:27 -0400 Subject: [PATCH 4/5] move one file to pandasai agent folder --- generate_html_report.py => pandasai_agent/generate_html_report.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename generate_html_report.py => pandasai_agent/generate_html_report.py (100%) diff --git a/generate_html_report.py b/pandasai_agent/generate_html_report.py similarity index 100% rename from generate_html_report.py rename to pandasai_agent/generate_html_report.py From d4b9d78c195fd5e4c0a2c3aa996cca572e24021a Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Sat, 21 Jun 2025 01:16:29 -0400 Subject: [PATCH 5/5] icons and changes --- .gitignore | 1 + dspy_agent/Dockerfile | 19 +- dspy_agent/chat_dspy.py | 265 ++++++++++++++++------- dspy_agent/requirements_nicegui_dspy.txt | 4 +- dspy_agent/static/agent.png | Bin 0 -> 12842 bytes dspy_agent/static/user.png | Bin 0 -> 16763 bytes pandasai_agent/chat_analysis.py | 4 +- tools/chat_analysis.xml | 7 +- 8 files changed, 208 insertions(+), 92 deletions(-) create mode 100644 dspy_agent/static/agent.png create mode 100644 dspy_agent/static/user.png diff --git a/.gitignore b/.gitignore index a028562..8b29dff 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ user_config_* test*.py test*.xml dspy_agent.py +!dspy_agent/static/*.png diff --git a/dspy_agent/Dockerfile b/dspy_agent/Dockerfile index 1d69dd5..fa39d49 100644 --- a/dspy_agent/Dockerfile +++ b/dspy_agent/Dockerfile @@ -1,6 +1,9 @@ FROM python:3.10-slim -# Install system dependencies (including libc6-dev for additional headers) +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Install system-level dependencies required for popular Python data science and plotting libraries RUN apt-get update && apt-get install -y \ build-essential \ gfortran \ @@ -18,21 +21,15 @@ RUN apt-get update && apt-get install -y \ libpq-dev \ && rm -rf /var/lib/apt/lists/* -# Set working directory + WORKDIR /ChatAnalysis -ENV STREAMLIT_HOME=/ChatAnalysis/.streamlit -# Copy all project files into the container -COPY . . +COPY dspy_agent/requirements_nicegui_dspy.txt . -# Upgrade pip to get the latest binary wheels RUN pip install --no-cache-dir --upgrade pip - -# Install Python dependencies from requirements.txt RUN pip install --no-cache-dir -r requirements_nicegui_dspy.txt -EXPOSE 9090 +COPY . . -# Adjust permissions if needed -RUN chmod -R 777 /ChatAnalysis \ No newline at end of file +EXPOSE 9090 diff --git a/dspy_agent/chat_dspy.py b/dspy_agent/chat_dspy.py index 4c6d5c4..f8d99e8 100644 --- a/dspy_agent/chat_dspy.py +++ b/dspy_agent/chat_dspy.py @@ -26,25 +26,53 @@ SCRIPT_PATH = Path(__file__).resolve().parent +# try: +# APP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +# dspy_cache_path = APP_OUTPUT_DIR / ".dspy_cache" +# dspy_cache_path.mkdir(parents=True, exist_ok=True) +# os.environ["DSPY_CACHEDIR"] = str(dspy_cache_path.resolve()) + +# matplotlib_cache_path = APP_OUTPUT_DIR / ".matplotlib_cache" +# matplotlib_cache_path.mkdir(parents=True, exist_ok=True) +# os.environ["MPLCONFIGDIR"] = str(matplotlib_cache_path.resolve()) + +# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') +# logging.info(f"SCRIPT CWD when starting: {Path.cwd()}") +# logging.info(f"APP_OUTPUT_DIR resolved to: {APP_OUTPUT_DIR.resolve()}") +# logging.info(f"DSPY_CACHE_DIR set to: {os.environ['DSPY_CACHE_DIR']}") +# logging.info(f"MPLCONFIGDIR set to: {os.environ['MPLCONFIGDIR']}") + +# except Exception as e: +# print(f"ERROR during initial cache path setup: {e}", file=sys.stderr) + try: APP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + # --- DSPy Cache Setup --- dspy_cache_path = APP_OUTPUT_DIR / ".dspy_cache" dspy_cache_path.mkdir(parents=True, exist_ok=True) - os.environ["DSPY_CACHEDIR"] = str(dspy_cache_path.resolve()) + dspy_cache_path_str = str(dspy_cache_path.resolve()) + os.environ["DSPY_CACHEDIR"] = dspy_cache_path_str + # --- Matplotlib Cache Setup --- matplotlib_cache_path = APP_OUTPUT_DIR / ".matplotlib_cache" matplotlib_cache_path.mkdir(parents=True, exist_ok=True) - os.environ["MPLCONFIGDIR"] = str(matplotlib_cache_path.resolve()) + matplotlib_cache_path_str = str(matplotlib_cache_path.resolve()) + os.environ["MPLCONFIGDIR"] = matplotlib_cache_path_str + # --- Logging Setup --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') logging.info(f"SCRIPT CWD when starting: {Path.cwd()}") logging.info(f"APP_OUTPUT_DIR resolved to: {APP_OUTPUT_DIR.resolve()}") - logging.info(f"DSPY_CACHE_DIR set to: {os.environ['DSPY_CACHE_DIR']}") - logging.info(f"MPLCONFIGDIR set to: {os.environ['MPLCONFIGDIR']}") + # Use the local variable for logging, which is safer. + logging.info(f"DSPY_CACHE_DIR set to: {dspy_cache_path_str}") + logging.info(f"MPLCONFIGDIR set to: {matplotlib_cache_path_str}") except Exception as e: - print(f"ERROR during initial cache path setup: {e}", file=sys.stderr) + # Improved error printing to give more context if another error occurs + print(f"ERROR during initial cache path setup: {type(e).__name__}: {e}", file=sys.stderr) + import dspy @@ -91,9 +119,25 @@ AUTHORIZED_MODULES_FOR_CODE_TOOL = [ "pandas", "numpy", "matplotlib.pyplot", "seaborn", "scipy.stats", "pathlib", "io", "sklearn", "autogluon", "random", "joblib", "openpyxl", - "anndata", "Bio", "vcf", "statsmodels", "plotly", + "anndata", "Bio", "vcf", "statsmodels", "plotly", "itertools", "collections", "json", ] +class FinishTool(dspy.Tool): + """A dummy tool that signals the end of the interaction and provides the final answer.""" + name = "finish" + input_variable = "final_answer" + output_variable = "text" + description = "Use this action to end the interaction and provide the final answer." + + def __init__(self): + # This line registers the __call__ method as the function to run for this tool. + super().__init__(func=self.__call__) + + def __call__(self, final_answer: str) -> str: + # This tool doesn't need to do anything. Its only job is + # to exist and have the correct signature for validation. + return "Final answer received." + class PythonCodeTool(dspy.Tool): name = "python_code_executor" input_variable = "code" @@ -250,6 +294,13 @@ class DataAnalysisSignature(dspy.Signature): **IMPORTANT: To prevent file conflicts, all generated file and plot names MUST end with a unique suffix (e.g., a short random string or number). For example, save 'plot.png' as 'plot_a8d3.png'.** + When you have gathered all the necessary information and are ready to provide the final answer, + you MUST use the special 'finish' action. The 'finish' action requires a single argument: 'final_answer'. + The value for 'answer' MUST be a single, valid JSON string. + Here is a literal example of the final step: + Thought: I have collected all the results and I am ready to provide the final answer. + Action: finish(answer='{"explanation": "The analysis is complete.", "plots": ["generated_files/plot1.png"], "files": ["generated_files/data.csv"], "next_steps_suggestion": ["Consider further analysis."]}') + Finally, provide a comprehensive answer to the user in JSON format. This JSON MUST include: - "explanation": A textual explanation of what was done and the insights. - "plots": A list of relative paths (from 'outputs_dir') to any generated plot image files. rember to return the paths for all plots generated. @@ -266,7 +317,8 @@ def __init__(self, outputs_dir: Path, current_dataset_path: Path | None, max_ite super().__init__() self.react_agent = dspy.ReAct( DataAnalysisSignature, - tools=[PythonCodeTool(outputs_dir=outputs_dir, current_dataset_path=current_dataset_path)], + tools=[PythonCodeTool(outputs_dir=outputs_dir, current_dataset_path=current_dataset_path), + FinishTool()], max_iters=max_iters ) @@ -339,7 +391,7 @@ def load_examples_from_json(json_file_path: Path) -> list[dspy.Example]: question=item.get("question"), context=item.get("context"), rationale=item.get("rationale"), - final_answer=item.get("final_answer") # This is a JSON string + answer=item.get("answer") # This is a JSON string ).with_inputs("question", "context") examples.append(example) logging.info(f"Loaded {len(examples)} examples from {json_file_path}") @@ -352,7 +404,7 @@ def validation_metric(example: dspy.Example, prediction: dspy.Prediction, trace= to-do: improve this validation metric to be more robust. """ try: - pred_dict = json.loads(prediction.final_answer) + pred_dict = json.loads(prediction.answer) # Basic check: explanation exists and is non-empty if "explanation" not in pred_dict or not pred_dict["explanation"]: @@ -370,10 +422,10 @@ def validation_metric(example: dspy.Example, prediction: dspy.Prediction, trace= return True except json.JSONDecodeError: - logging.debug(f"Validation Fail: Prediction not valid JSON. Pred: {str(prediction.final_answer)[:200]}") + logging.debug(f"Validation Fail: Prediction not valid JSON. Pred: {str(prediction.answer)[:200]}") return False except Exception as e: - logging.error(f"Validation Metric Error: {e}. Pred: {str(prediction.final_answer)[:200]}", exc_info=True) + logging.error(f"Validation Metric Error: {e}. Pred: {str(prediction.answer)[:200]}", exc_info=True) return False @@ -458,9 +510,10 @@ def __init__(self, user_id: str, cli_args_ns: argparse.Namespace): self.memory = deque(maxlen=30) # Keep conversation history for context self.bookmarks = [] - self.current_dataset_file_path: Path | None = None - self.current_dataset_display_name = "No dataset loaded" - self.current_input_data_type = self.cli_args.input_data_type + # self.current_dataset_file_path: Path | None = None + # self.current_dataset_display_name = "No dataset loaded" + # self.current_input_data_type = self.cli_args.input_data_type + self.current_dataset: dict | None = None self.current_data_object = None # Loaded data (e.g., DataFrame) self.summary_stats_csv_path: Path | None = None # For pandas summaries self.eda_report_path: Path | None = None # Not used with DSPy agent directly unless agent creates it @@ -493,6 +546,21 @@ def __init__(self, user_id: str, cli_args_ns: argparse.Namespace): self.compile_dspy_agent_on_startup = self.cli_args.compile_dspy_agent # From CLI self.load_initial_state() + + @property + def current_dataset_file_path(self) -> Path | None: + """Safely gets the file path from the current dataset dictionary.""" + return self.current_dataset.get('path') if self.current_dataset else None + + @property + def current_dataset_display_name(self) -> str: + """Safely gets the display name from the current dataset dictionary.""" + return self.current_dataset.get('display_name', 'No dataset loaded') if self.current_dataset else 'No dataset loaded' + + @property + def current_input_data_type(self) -> str | None: + """Safely gets the data type from the current dataset dictionary.""" + return self.current_dataset.get('type') if self.current_dataset else None def load_initial_state(self): cli_openai_path_str = self.cli_args.cli_openai_key_file_path @@ -520,18 +588,36 @@ def load_initial_state(self): self.memory = deque(history.get("memory", []), maxlen=30) # Restore memory self.bookmarks = history.get("bookmarks", []) + # saved_dataset_path_str = history.get("analysis_file_path") + # if saved_dataset_path_str: + # saved_dataset_path = Path(saved_dataset_path_str) + # if saved_dataset_path.exists(): + # self.current_dataset_file_path = saved_dataset_path + # self.current_dataset_display_name = self.current_dataset_file_path.name + # self.current_input_data_type = history.get("input_data_type", self.current_input_data_type) saved_dataset_path_str = history.get("analysis_file_path") if saved_dataset_path_str: saved_dataset_path = Path(saved_dataset_path_str) if saved_dataset_path.exists(): - self.current_dataset_file_path = saved_dataset_path - self.current_dataset_display_name = self.current_dataset_file_path.name - self.current_input_data_type = history.get("input_data_type", self.current_input_data_type) + # This now correctly sets the single 'source of truth' dictionary + self.current_dataset = { + "path": saved_dataset_path, + "display_name": saved_dataset_path.name, + "type": history.get("input_data_type", "csv") + } + # if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + # self.current_dataset_file_path = self.initial_dataset_path_from_arg + # self.current_dataset_display_name = self.current_dataset_file_path.name + # self.current_input_data_type = self.cli_args.input_data_type + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): - self.current_dataset_file_path = self.initial_dataset_path_from_arg - self.current_dataset_display_name = self.current_dataset_file_path.name - self.current_input_data_type = self.cli_args.input_data_type + display_name = self.cli_args.file_name or self.initial_dataset_path_from_arg.name + self.current_dataset = { + "path": self.initial_dataset_path_from_arg, + "display_name": display_name, + "type": self.cli_args.input_data_type, + } summary_path_str = history.get("summary_stats_csv_path"); eda_path_str = history.get("eda_report_path") if summary_path_str and Path(summary_path_str).exists(): self.summary_stats_csv_path = Path(summary_path_str) @@ -541,9 +627,9 @@ def load_initial_state(self): for msg in self.messages: if msg.get("role") == "assistant" and msg.get("timestamp") in bookmarked_message_timestamps: msg['bookmarked'] = True - if msg.get("role") == "assistant": - # any existing cost will be kept; missing ones default to None - msg.setdefault("cost", None) + # if msg.get("role") == "assistant": + # # any existing cost will be kept; missing ones default to None + # msg.setdefault("cost", None) logging.info(f"Chat history loaded from {self.chat_history_file_path}") except Exception as e: logging.error(f"Error loading chat history: {e}", exc_info=True) @@ -712,8 +798,10 @@ async def handle_user_input(self, user_question: str | None): dataset_info_str = "No dataset currently loaded." if self.current_dataset_file_path: dataset_info_str = ( - f"Current dataset path for tool: '{self.current_dataset_file_path}'.\n" - f"Dataset type: '{self.current_input_data_type}'.\n" + # f"Current dataset path for tool: '{self.current_dataset_file_path}'.\n" + # f"Dataset type: '{self.current_input_data_type}'.\n" + f"Current dataset path for tool: '{self.current_dataset['path']}'.\n" + f"Dataset type: '{self.current_dataset['type']}'.\n" f"Agent's output directory for saving files: '{self.outputs_dir}'. " f"Tool must save generated files (plots, CSVs) into 'outputs_dir / \"{AGENT_GENERATED_FILES_SUBDIR.name}\"/' " f"(e.g., outputs_dir / \"{AGENT_GENERATED_FILES_SUBDIR.name}/plot.png\").\n" @@ -747,7 +835,7 @@ async def handle_user_input(self, user_question: str | None): break # Run the DSPy agent in a separate thread - with dspy.context(track_usage=True): + with dspy.context(): prediction = await asyncio.to_thread( self.dspy_agent, question=user_question, context=agent_context ) @@ -762,37 +850,37 @@ async def handle_user_input(self, user_question: str | None): # completion_cost = usage_data.completion_tokens * pricing.get("completion", 0) # total_cost = prompt_cost + completion_cost - logging.info("Calculating cost by manually aggregating from lm.history...") - total_prompt_tokens = 0 - total_completion_tokens = 0 - cost_calculated = False - - # The history is on the configured language model object itself. - lm_history = dspy.settings.lm.history if hasattr(dspy.settings.lm, 'history') else [] - - if lm_history: - for api_call in lm_history: - # According to the documentation, 'usage' is a direct key in each history entry. - usage_data = api_call.get('usage') - if usage_data: - prompt_tokens = usage_data.get("prompt_tokens", 0) - completion_tokens = usage_data.get("completion_tokens", 0) - total_prompt_tokens += prompt_tokens - total_completion_tokens += completion_tokens - - if total_prompt_tokens > 0 or total_completion_tokens > 0: - pricing = MODEL_PRICING.get(self.selected_model_id, {}) - prompt_cost = total_prompt_tokens * pricing.get("prompt", 0) - completion_cost = total_completion_tokens * pricing.get("completion", 0) - total_cost = prompt_cost + completion_cost - logging.info( - f"SUCCESS: Final cost is ${total_cost:.6f} from " - f"({total_prompt_tokens} prompt + {total_completion_tokens} completion tokens)" - ) - cost_calculated = True - - if not cost_calculated: - logging.error("FAILURE: No usage data was found in any lm.history entries after the call.") + # logging.info("Calculating cost by manually aggregating from lm.history...") + # total_prompt_tokens = 0 + # total_completion_tokens = 0 + # cost_calculated = False + + # # The history is on the configured language model object itself. + # lm_history = dspy.settings.lm.history if hasattr(dspy.settings.lm, 'history') else [] + + # if lm_history: + # for api_call in lm_history: + # # According to the documentation, 'usage' is a direct key in each history entry. + # usage_data = api_call.get('usage') + # if usage_data: + # prompt_tokens = usage_data.get("prompt_tokens", 0) + # completion_tokens = usage_data.get("completion_tokens", 0) + # total_prompt_tokens += prompt_tokens + # total_completion_tokens += completion_tokens + + # if total_prompt_tokens > 0 or total_completion_tokens > 0: + # pricing = MODEL_PRICING.get(self.selected_model_id, {}) + # prompt_cost = total_prompt_tokens * pricing.get("prompt", 0) + # completion_cost = total_completion_tokens * pricing.get("completion", 0) + # total_cost = prompt_cost + completion_cost + # logging.info( + # f"SUCCESS: Final cost is ${total_cost:.6f} from " + # f"({total_prompt_tokens} prompt + {total_completion_tokens} completion tokens)" + # ) + # cost_calculated = True + + # if not cost_calculated: + # logging.error("FAILURE: No usage data was found in any lm.history entries after the call.") # --- Enhanced Debugging for Prediction and Trajectory --- @@ -840,15 +928,15 @@ async def handle_user_input(self, user_question: str | None): formatted_middle_steps = self.format_raw_middle_steps_for_display(trajectory_data) - if prediction and hasattr(prediction, 'final_answer') and prediction.final_answer: - parsed_response_dict = self.parse_response_content_for_nicegui(prediction.final_answer) - elif prediction: # If no final_answer but prediction exists - logging.warning("Prediction object does not have 'final_answer' attribute or it's empty. Using str(prediction) as explanation.") + if prediction and hasattr(prediction, 'answer') and prediction.answer: + parsed_response_dict = self.parse_response_content_for_nicegui(prediction.answer) + elif prediction: # If no answer but prediction exists + logging.warning("Prediction object does not have 'answer' attribute or it's empty. Using str(prediction) as explanation.") # The prediction itself might be the string output if the signature wasn't fully adhered to parsed_response_dict = self.parse_response_content_for_nicegui(str(prediction)) else: # Prediction is None or no useful content parsed_response_dict = {"explanation": "Agent did not return a valid response.", "plots": [], "files": [], "next_steps_suggestion": []} - logging.error("Agent did not return a usable response (prediction is None or lacks final_answer).") + logging.error("Agent did not return a usable response (prediction is None or lacks answer).") except Exception as e: logging.error(f"Error during DSPy agent interaction for '{user_question}': {e}", exc_info=True) @@ -914,7 +1002,7 @@ def make_ui_path(p_str, outputs_dir_base: Path, agent_subdir: Path): "middle_steps": formatted_middle_steps, "type": "text_with_attachments", "next_steps": parsed_response_dict.get("next_steps_suggestion", []), - "cost": total_cost, + # "cost": total_cost, } self.messages.append(assistant_message) self.memory.append(f"Assistant: {str(assistant_message['content'])[:200]}...") @@ -928,7 +1016,7 @@ def make_ui_path(p_str, outputs_dir_base: Path, agent_subdir: Path): self.show_details_for_message(new_assistant_message_idx) def parse_response_content_for_nicegui(self, final_answer_json_str: str | dict ): - """Parses the JSON string from DSPy agent's final_answer field.""" + """Parses the JSON string from DSPy agent's answer field.""" if isinstance(final_answer_json_str, dict): # Already a dict # Ensure standard keys return { @@ -1081,10 +1169,31 @@ def format_raw_middle_steps_for_display(self, trajectory_data) -> str: async def on_page_load_actions(self, client: Client): logging.info(f"Client connected (User: {self.user_id}). Loading initial actions.") dataset_loaded = False + # if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + # self.current_dataset_file_path = self.initial_dataset_path_from_arg + # self.current_dataset_display_name = self.current_dataset_file_path.name + # self.current_input_data_type = self.cli_args.input_data_type + # ui.notify(f"Loading dataset from arg: {self.current_dataset_display_name}", type='info', timeout=2000) + # await self.preview_loaded_or_uploaded_dataset() + # dataset_loaded = True + # if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): + # self.current_dataset_file_path = self.initial_dataset_path_from_arg + + # # Apply the same logic here to check for the custom file_name + # if self.cli_args.file_name: + # self.current_dataset_display_name = self.cli_args.file_name + # else: + # self.current_dataset_display_name = self.current_dataset_file_path.name + + # self.current_input_data_type = self.cli_args.input_data_type + if self.initial_dataset_path_from_arg and self.initial_dataset_path_from_arg.exists(): - self.current_dataset_file_path = self.initial_dataset_path_from_arg - self.current_dataset_display_name = self.current_dataset_file_path.name - self.current_input_data_type = self.cli_args.input_data_type + display_name = self.cli_args.file_name or self.initial_dataset_path_from_arg.name + self.current_dataset = { + "path": self.initial_dataset_path_from_arg, + "display_name": display_name, + "type": self.cli_args.input_data_type, + } ui.notify(f"Loading dataset from arg: {self.current_dataset_display_name}", type='info', timeout=2000) await self.preview_loaded_or_uploaded_dataset() dataset_loaded = True @@ -1239,7 +1348,7 @@ async def handle_upload(self, e: UploadEventArguments): return uploaded_filename = e.name # Determine file type - use suffix or allow user to specify later - self.current_input_data_type = Path(uploaded_filename).suffix.lower().replace('.', '') + # self.current_input_data_type = Path(uploaded_filename).suffix.lower().replace('.', '') if not self.current_input_data_type: # Fallback if no suffix self.current_input_data_type = "csv" # Or ask user ui.notify(f"Could not determine file type for {uploaded_filename}, assuming CSV. You can change this if needed.", type='warning') @@ -1250,8 +1359,14 @@ async def handle_upload(self, e: UploadEventArguments): with open(temp_file_path, 'wb') as f: f.write(e.content.read()) - self.current_dataset_file_path = temp_file_path - self.current_dataset_display_name = uploaded_filename + # self.current_dataset_file_path = temp_file_path + # self.current_dataset_display_name = uploaded_filename + + self.current_dataset = { + "path": temp_file_path, + "display_name": uploaded_filename, + "type": Path(uploaded_filename).suffix.lower().replace('.', '') + } # Update PythonCodeTool's dataset path if agent is already initialized if self.dspy_agent and hasattr(self.dspy_agent, 'react_agent') and self.dspy_agent.react_agent.tools: @@ -1312,8 +1427,9 @@ async def preview_loaded_or_uploaded_dataset(self): self.dataset_preview_area.clear() with self.dataset_preview_area: - ui.label(f"Active: {self.current_dataset_display_name} ({self.current_input_data_type.upper()})").classes('text-md font-semibold mb-1') + # ui.label(f"Active: {self.current_dataset_display_name} ({self.current_input_data_type.upper()})").classes('text-md font-semibold mb-1') + ui.label(f"Active: {self.current_dataset['display_name']} ({self.current_dataset['type'].upper()})").classes('text-md font-semibold mb-1') self.current_data_object = self.load_data_object_from_path(self.current_dataset_file_path, self.current_input_data_type) if self.current_data_object is None: @@ -1629,9 +1745,9 @@ def update_details_pane(self): ui.label(f_path.name).classes("font-semibold text-sm text-gray-800") ui.button(icon="download", on_click=lambda current_path=str(f_path): ui.download(current_path, filename=Path(current_path).name)) \ .props("flat dense size=sm color=primary round").tooltip("Download File") - if source_data.get("cost") is not None: - ui.markdown(f"**API cost for this query:** ${source_data['cost']:.4f}") \ - .classes("mt-3 text-sm text-gray-600") + # if source_data.get("cost") is not None: + # ui.markdown(f"**API cost for this query:** ${source_data['cost']:.4f}") \ + # .classes("mt-3 text-sm text-gray-600") # --- Fallback message if no content to display --- if not plots_to_display and not files_to_display and \ @@ -1771,6 +1887,7 @@ def _handle_drawer_escape_key(self, e): parser.add_argument("--input_type", dest="input_data_type", default="csv", help="Type of the initial dataset file (e.g., csv, tsv, h5ad).") parser.add_argument("--dspy_examples", dest="dspy_examples_path", default=str(DEFAULT_DSPY_EXAMPLES_FILE), help="Path to DSPy training examples JSON file.") parser.add_argument("--compile_dspy", dest="compile_dspy_agent", action=argparse.BooleanOptionalAction, default=True, help="Enable/disable DSPy agent compilation on startup.") + parser.add_argument("--file_name", dest="file_name", default=None, help="Optional file name to use for the initial dataset (if provided).") cli_args = parser.parse_args() diff --git a/dspy_agent/requirements_nicegui_dspy.txt b/dspy_agent/requirements_nicegui_dspy.txt index ff65894..6545cf4 100644 --- a/dspy_agent/requirements_nicegui_dspy.txt +++ b/dspy_agent/requirements_nicegui_dspy.txt @@ -1,9 +1,10 @@ # Core Application & Frameworks nicegui -dspy-ai==2.4.3 +dspy-ai==2.6.24 python-dotenv cloudpickle psycopg2-binary +structlog # Core Data Science & Plotting pandas @@ -29,6 +30,5 @@ catboost # Bioinformatics anndata biopython -PyVCF pysam gffutils \ No newline at end of file diff --git a/dspy_agent/static/agent.png b/dspy_agent/static/agent.png new file mode 100644 index 0000000000000000000000000000000000000000..f1609f48e4930325b7b41e324bebd0e778082d53 GIT binary patch literal 12842 zcmd6OXH-*7_wPwU2uKrY(pxA}l_nijN|A1e#fhfCR05Ht?`+;ot1qgtJoFTflAr=81A>p^~xdY+h;WFO- zzQJy{0^MZ-?s?{It8)Q>0D#uHW*L#cH5d7e*XlWCH^5U+(iafCdy7uswL}?_<{TsS zEuQ!!wS?3pRSDIaHSx=N$~EHhHLWj-Fsb4);==d;!6e+hiBEG}{5FtuHLf>=7DuhJi9%Qzt}I%pTf}Mv9ssXbYeo{ODTiVSmP( zpRD(NlxT|XBhD^55)57g=>`5`i&Tpf5o5KeBU z@dFibfrrWH8gAUS0s9I2II%~WGjjRnVkP1IGbEbfGIIp)PV|!Ob-kDnSWd>T#6%R% z$MeP_lC)U2mYjNXIO4oA+Y~F>d>~J^efaF+F2}`U`6b##dQws{j=o0F`bHI*-(dQ10NY$sI zy)HaYJan$E%Y5oa;a<>zD=>#by!ITXb^f8=xISMmDXrWfp!&$Zl z5iU9M3!VNGa1qugL~>-2VUk9}Rp?s_C@h_~8d)YGkA=`&O?V7HOZ@e#PsVzG6B5e^ z2KQ84t;VPp3sIk5xBJXYNh7SAPP_jJ39^St`JK#!kklzoge&YoCZ?P-xT^9SQ4AxS zPR(HO^cLhr#TO>!%7aqs&kRz?YQG+!mEYDVin^=SR7%JgtInPWk;e=>#Q#d!!tkPd z?s?++y}exHq4U?HvP7)z!!Z@N&#iNuX=Q8!;tUa;^=#qkGQSR?hZ%YWHt(qkZF`;> zN%x(wv6wEbXYVVkOTO4q;0#(tGot$%FcaxaXjF=}SSWQy%+fm=woi>B`~kj1qY7;Y z+&OF)whQA4izdygQHMhVU@{CNH*o(N2|H2plw!%5!~pCkj4(E(zgfF5>(|aYImKcmVgq;}%i$=AlG@7E! zw?Z>S4efM~-?t34J`M?Aw$1%b7hhEZ)lc_`?R-ihHw@X{;5u8Px{mYlMJ&oKnYOz3 zDH0!J=8~pe|7dl<_(fAn(oPFLBpIRttWoCGow$7;`+aotTI=)uAH&IUnri9WLc=1b zN3yEfK&`C51=>6xO`J^Bs%Pg6hn1$sJ(c*(M}ThfEgdEcEq@@m>El#M%|xr-LZjpa zD`F(^3Z<$!sXwEiKHoSuRyHdBl2DD{+*Udv^`7{FZ5x@9)UYa@Jf|)(Rzf)3TxIDw zd!ooE$fks|jC3Mk1AP;Z<@Osi_M9q zY2Hdzvd-`qB-`yjy4mW4kzd;!fN`XYn&AirUm&fCl*|>c+sr$)%AT<-cj3p}_#?f+ z6ymRk0c>clSE~Tih4_0s-p6bdKqh)d=MyTef}OdCKUIMpdc@I#Mta|IhN@pXMh78x zsk|Sl1eE%ydZ(`z_mXpj6JHUonjMA%PBoVGI;RpL@t?Pw)%Y*gNaYTA|Ll)lIxp^UUJJ;zo+*$X%HuuOMlF?E7ldZ z%%N;ttLM+OXwysX+3HUV{H~1_$u+S5tn}y3q{%EJkJE^U*#n+ytDSQ{)kx2%(GuhR z#BaTYE02-~P;`ApQl9c9$f-J=t;AiB#9RSW{$@ZIp+g$%t8hQ&j067`X z4=alH(3$6@6HN#=?m~t{ooGC8*-G0&;Ko!4eEnEK@h7ah^+6+R^`jQS9Zr&_oo+Z( zRzP}G8vS6Jsx_P9A_MEYuz2t-w2NI7KtT8rGD5@LTGLxy>H9LrTCmZ8*kGRxJBnsQurP_*AH04-fvh{cgRxeA=*kooO71t;W`vB$X#}TuO`mt2 z2!pG{I`7h2rsbI}EdPlGp79PH>G*(In8YtsUAc(bJ4iuj>g85y{h_S$=X~AB!Tp&6 zC?dOJrKh_z-l9qxa~{Na=!e9!)NELx&o@G;T9&1_ZoK}<@@-PqJdVqsXCi-gMS`bm z)`eroq=2i@=2Zl11cFg&J0QB^x9yf=XQim(d}*Q<=RsNn_L6&8J8TQp9?N|)N6(Sd zbZ^2!6yhRs+Of`Gn)AgQJ@Jci$@F@x8+_-xdGze_?`Csj-(Y>q#195vF5tJSi3UqO zh2257%Qdns(ZML#W|66OS3M0S?=SH@y2&$-Tk3PhIU|j^J=C7GPP|rEeYQeVJ>gLV z`-J_MO&OAoVpKa*dWl-{D(xu^uPosWP{wT$x_R{Zmv7~5MI66MCX z9tsZMcWpB{enDuFa=*ox_pV-3e~3Ef2Q{imi48pYGmQiCZ*U2`DpjMg)jdA6u&hE3 zBg;zjjJQ(xPfqsxLzVS0mJSQTr^rv-g}pZY)N)T1!lU;OEkb5tMzVow6S^D|Q7o&Q z_c|mm{fq7`bc|Zg!W~V&*SrBTvyLetZdqRC7d<^%PYHiIYlG&r`j&m~>dyRM5)bo~ zFDHXZY|3q)%uX|Wo*dbx1gxN%>e#$7ru%4j-kofaYF*Rx_|P&erlzji$~52=S8c3j z!Vh&6s529`i=NUu{eNjh&4weFlT#lD*?s-m`DkBK{ZB`(zP*@^IpWWSU9k*BqU7MV z7Xy#Uarje7Mp>`Ci7%4n{W?yCvK_^^q^pl9tBF4_lR0x^xfH5hu_MRb-TTdUb6r@< z0AxGyQX5?wuy{jYgnin|E$KM)Z<6vzZ&RsO_<@gydk|Kh$400mMCTP8nuiH%h zJY>}y@7cCg0kW{@d`uy43gx%lsOUVr5dWIuwcb@#=#Y3ef3I%~?$;oy@{mH_ntOF< zPAc2djha;biQHW$McbL~C2GA{P0Bkr?P0v9a=WL6jqtD+X*Gmgq`qx>eigP1j>g!@ z;P|16jidcLcHe>^Vp`(Kwm-BaYwjjmnC0Zh;yKciC>Or=!e41!q`mSK$5@vJl9gXI zSPCL=qBDuAg0sd^d)mc^9!s)AZA&SjG$qsdQ}E)?UpG?CMT`WbmpMb_Z0R?xqhx;s za4#+V9tqtIO!`tkgAdu%jZM3X^Wb{Qy0+Ylr?UQ9B&hqxWl8p<`#|4AKWScr&8#=w zTf*?^l&PC%0kS>GDfh&8Io4h;>w>k{?AxzybU4y(-i+fAIk?K$n&0~1tm9jFeMa1k zW8L;DiWPPSZTed@Xkp!5`C59`oLZvVS}md^d!bKny3j`oY7tUJ{YyGfZT(Uv);HnE zf5G}MqiL>){AUkhnJ-uF^C`_B5_()Yc*&9>bmcwx1a4N!#cnv#eNXx}<9;a7P;S3= zy&aV)q3hC^oJ6;y6vU{1{!n#iQf}YWr*rlEVf&Of=bzF$H(X_QImq?>->SRC)8d7{ zM?2GP=DdtIcMFB|hcUQw%ZdpuJ*nhX#sVSvYj;{3QJJds8pZ zlp)qhe&6&*9#y{KabFB!_r!X85Ztb`x4RGBzzU!IOGM~x#tjw&GX7DpdinX}?m8xp z57sBvx|ex zErzJFGtRWT`KJ_aoQ-$Oko-Qs>vR1x044FBD?GFQ;6ZRxK0Mt)u3NdI;S|~1CY$j& z#lc)p8Bm{y^Ln!n&pb#W9~l{|YgBqSxjk3b@-4$>GQZD`!~*eJ`l|{aPDDC13?3{+ z-@v7eh`0vIa$ZZ|?6;dpom1)vI`miQPyWv2K#y9JEH~9D-`59vx?gxGlD|iVT}H-K z%4@NhqwBz+x2pj_kSM9Pp~T++Tl4&hH52PNdYl34op%NgS( z_;+VN7C1Xq3&R10`~*(jNnF2t`Rz`anK4=HtoH0Mvn1%LrHdpcQ@%xouk>;D^lQ%F zt1&%L&!9>e87Sjru1u1Fd4N(*ukvCxBE>FCyG73tHaf-J@^DN009l0hsLcp&GBi~% z9wZ83!nMeq>17vOLXZJ3WVh~o@bGmqu~+ zrHVW%Ie4Xg>sPV*ocVcvh*wqI_3phfgBTvpK{cS>na*frYJOF&S)_a~M%2wA6Jz6f z*Yv~|=pAg^8})sNWcYq=aq!7jmxPza%l`Q_##$?37WkGbfTLk^qjv+?OEn1V>toW{e)ST!5 zYR3QPi2MuXZupC$AyKl5&tp(Zmw8CSF4xcD`v+=^BSwDcipOf01y7$!x(N4@=JCgH z?I-yyo!tfX(&&SXz1gpBfa1T+D;-V0Xx5z^6d$AkmW20&OZS5{sHa3tTgE@A0l$X@ z7q-s$p_Ot?BIVB3&{cPknX;UPgHJv1jx54CGZ?yKP*Iw{ny$h*gL|C^NC}QgH$Ajn zR7?T6H3ku3D=MBEj7CGBq_G?cvy9YABjcajR-C>VnxBNg&N9oZQ!uL@K8CiyO7`Bb zvo?^4rHjg_h$i*BcTj;^N43KfBLCmM=WBrN598J&{OA-J_w9$v0UjIshH84u~2w5BACa3h&fEF$>rJc~7k7oG#nb zCi0QA=Ux$CTdPB*0DpwP?+Ll2?R-SQqp)_=>LA(Et8!Ng_^E=lP%{Pg-5yFHigC zBP|fQ9chd@kRaexp*k(z)K{z^yTPsH9D-`uv@Bg?Q5fzef5g0gFNcNj*;;VRZGfutj0gJ%vEA7Ly_M7HK%#pj#s*-C^NpruC+&ag{hXmSag6K3YMa6sQ!}y}( z^Z`wvXG!y~_v0pRc0g7@ty|VA zD_ZIX%3hM6oZ z?z-Z_~hDVuInolIjS16gH0YF2YY zBjY9$*`vo&-i#q=w?nQ)!!h)J<_|{g3B*;y2T{*{BHmZMw5R(XlSP48KDs$Tp_#an z7_xn-W0Wp9v@Eeg^olyVGVZq_M-|-^l5gLRb2#1~+=^ouT*-+5w zMP%p9ng~Z0$&9A1T&k=1c^uD8$|~8mFjr~EG@u6T*;XRxKf*WDU*h$C-f=2$44z%h z&50IYMsu7E7qeve_|uUpo2+bQj%tjyh6cw?mTiG7PwSE}_B%eF;-wJTB$Mkt2^>JF zju>P%uN-gpeA=6b4)gObY0ZPc%cD?D*8B(K3Sulh_Vf3Rwpwp{p~3Z(m$Xo$lWUSs zmsvD&z3s|q&SEUU8=V1b>w$~vzx${s3@(`1z_m7k8{7|m zU*>ye-pEPRf;abo?>FHlTVSml%=In()=F77;x*?^>WgL(jkykt@q_7+U7CBs6>S3M z0}X( z)Buq>us0_%u*tNEZ!<=3eaSfV6&Rw3I&f+hz92pXPYWjXI*a`E3;{O+;1ak zUs&00Nrosf3P(AKEu-YURw4q;GWGni%Z-k_J{JanEMc3bc2x^Cv#oD&vce|@M+$Z7 z${Z7ua*GyV;P%^_HhPiO`gREmH4`e!|2uQ(+koZRbK=)tnIwWoTUtoQ33*M2`sM(X zsQ=#?seuYi@?zGB*yqz+iJ!3HndHdQabIcg>s6+@Vse<(4DwCen&5P}HZ7<)8E&3i zqWFLADN{XrR$d(zOGohvx!}{8i6!fZjg3+h+>{ep;7)Oam{qMpIB=dz>G>?vLnBk41H0jly!d~{M+-4zU9mC!5vYLv z*9Pg!9LiER97qZK&Xf*x3)Sk))9Cs%k9E$FjQRSsxR$oZAw^$a&2AeME9Et&>G6wc z5bR`w?Hq4wMJ*3%#scfBIr30mg+jkBt2GJ|2Yg7cKIX1lZ2fZ;`s`~R@nQ$Vpb4TlyL^!~% zFDB4bE^*{zm2@c14t-k(M5xLh|EL`N*1DV&SKZ+Ym0;$mMWM4x=T!CC-WA5^Tz8KCT^Be|-3Vv^zV4{PwR9VIbqtX(% zUtWR}rAU?h-5iw?xim(JCPKxR2%3gg@^H$f_1v4$#^OLfWPw_VY2?O6)72f?MTtd0 zS2pjWViBS-F$W`DHZWH~OX!YC9B&aJbk_(SoJf$Gr_>~_l@|`9(_k@bRn(e#r=hK@ z?q0%^_l5aKKIViKF&aX&G8I1E>4@#dj}$!HpmEjQIy}7X+LLaS94fOrV92ySHGC`R zQMcpsmno+~_cFN4lQ!ZIAk2}qh8&M@B9;@9zt#&=$R3&J9?`8=J#1++26#8JuVmP=um7id{KP6%TZR>dys z&#x^A?|FPqHuavi(C&~$Vz%pvU%{s>uil>t=SEYUhfikFCY*-_ufw@v4|FQ3LKNo_ zSWc1HU-4P{ZEm}TBC*>gM1;YgyX8&+i6$@TL#2RbxHAv_AFgUT}o8FCAvA zI}XYAZ85iTtcEAJ)TX@0r)0W|!K?-9%2gNt_e^@O=w0k&{G&E^9dPvtxu9F&00}&A za$FoS`tD=@XU=Ian@@i#J#uN(WmOY}CaQsmxDeHq)5=DU0!KK3us`!!a2pFH(>1A=Sw4>+f7pS?^8 z`?eO4szlJM2hx+mZ5}CYk9^eYxVL{*pWD0`#Jc*9*y@^Bl;WI!uBP?M8k`=75^D|p zZ}{-g0p$^C(3~8~?+@68EXr@;Q0BU#-q9W&Tg#g`iX_oirH|CW$FTiBWI^A8T-kQGF$%#xjv#lCR38XC9YTi))uq?OdOQRIbt4G z1Rm#clMgnp9p&tj)kmD6J61=>f`WZ(V~T*+y?!xzbaAKQzGfwT4^-3nqh~eLnQ_+Q z<0=}w_<;x_8!;L4xPOP!7n^1_>~MAxdqJ7mR{qhbEfo*C4R+~OKL|z+hKmFW1b!7i zoyo(##hS9^-fZip?tGSYgmR+UWOB6j(!+grg9v;J(%K0p(xEuoUh>iH1MvbH3~)5w_~dGh`>v>*)tIs^tD%NAN-)v(dG^8KO3J>MtC*|k(sFzgNy=(Fu#$G6aevOxttewKF!XfhQmi zgvLGk=^6t4j~0jjr?y?}zgpL-`k$63ptLxEvan)X4aZ1F!S9xjN%4EB+s3 zd6++tts{1m(9Q+`J*g(zI7_u2a8r7Lo3fi0Jos-4TYbrP8ly$|RSb69KXq%7>1M%e zk8Rijvy6`5A(LY1zLb3FZ+$K(*oO_!Yg&<8Y+geqSr0&gMeZ@$ag>#b5I1Pw{%d?v z=Twy?Rk3p8O#lTlLTi>~HAdTL`HdjKQ2N(4rOuhO0LtL*`%7TUkk5tJF}D0>)4TuI zve^OS9|p?So2vvBW)uN{x$B4YRv@UTLV5pAc+Gsvp6&T(G}V$A=Sie-7(sbR=&kpf-hQnq749AsVKKY zF7Vb!h-K^&O~564JN2}&*819C(L$-$xy&6xWdT6bs6C~4tbC9cz|8%%?l)h>HX020 zm$t$GIzR4t=keOqm6&C8u(Hj!lUbA3&lC`0Y;hBt?0D^K&2X6nlcL+8Zu8@HARhW+ zA78UaNU0_=Ht|I8)(NR*yjFY(U=$wjRieb^O7`2teK8M-VPlDAg;3WEJDY1r2@mrS zD7<`7^A9lbytgv53fxic9^=j|e31P=pohf6s?0@K#NwqQpbGK6h|m-JcrPFX%3Fp| zW~QJ+gxVC!0f54{BPa#c^SK62NHpM}FDA%v+qXiVnh15h6NQ?5WIsZ=`m6-hib6pR z#A)5DS+Bja3^H1h-GaRyfzTBS-@YV>u?N59GgE6h{o}HlM-Iu zcIG}StnV-W78b63m?pSTSMI$Bn0dLLFE#y^n)^lB3y5_K(q2;|Yk2!Ni!=l4Wv@D^Op zNC9^cXT(>VZ~MH$W|LQdkMj+!`SrLFZo0*bn1kX^{lKP4?hVbTj)Ro8SHB_x+xh{@ zlO9)asAU=RYS12_0AVEE)}<&Cf9e`%u;F3_Ug`-A&U7*7DHj8!>H-2~#%d5F2zEU& zx$BNNvIg zs!J`m&elZz)s0fsfK|^QDFnfj3<`_$S;YCN)*%zm%gKM(VfeT)a+BZ=ushQbuQT%L z7DPbt9U?*ND+8;GOv9(=!$o_^*mlSLqd#1`GoddcT%P0k9>Fo=0uGt|0y>5J)Jc<1 zr^w+MSs=Dk)Z$Z^f zj+^3Twc@Kh}avsb!^a@{#7JM)4h%Fudcyrq%(kTFp;*MWl zF8@3YoGd$N+jw`?7T9IE&?~GLEr4OcJjzLAU27WUa7h|a&DsOJ8b3qM;4yNl8Q1c) zqrxe|Zz5M-1Iop09k}lopTUecmse{9hJi@Uz=4Zel`JP>-h=BEsw|%WX0p0kETWg< z;S6kK+3LfJj&SClGe$O3ATR6l(XU8^wojRSf)+C)+Z>VG;78ZShH{1cyL)vMNAE6v zPH@C8!wBuQRKNP|7pvXj0z7LlRWq1zWB$~=MP`@0W*)@`oW|`PFJStoTIx<)Wf-PMx~a>|*_vFN3L z;26T$p6`-rc5w=vDCzxK6&iXqwJ|F|OydWcUrRQok`?q#NM~QL8>YK{X$8xpKZXU0?H-vj?{43|Y6?bR zkQ&1gu6(;1cAWU=I3rjRknjK%!P6J8w9o z6oU9Fd>_SvT6oiwuJ=x#%%MAQ!PWXEccl*KSW<`Z?23`(meC}sdt^ib%t+d!!5gr^ z!Z1wf&5@;P@|-fxHbWkgc)0fCpVlS!K;Q;E2cGYz>*Xy#X6OIx-6Uz(ydt=S8iL^e z`Xv_ZI^kfewB@$E>s8mVAER4dBPu^S2-nK+u2;U9R-XtjPT^*}@=JJ(k`#Gc&rQjL zS9aKtn6X8dlxhZOzC+3?m}G8hNWq;=xR?aI15I(?rPi{NdsLsnG56Y7l(8Ynj-)?P zn5#gGu2_*r9bjg{k+_MG52U&<2gopM7(ROC#B!!s3+64|p zJE~1EAyB!?LjM8@V7SVR?Eol+l6&2QC)%uGVo)*2+?%#6_hh53@vnY~gY|&f)7a+< z_EEP&gh(_R01tJHv~t?(;dkRBOGaA?NnqdG=UiPDabVv{PK@e^R`6h(Sb8m3`5*)s`_1_TuR)9Lnsn$8Vpb3&?J~m!&NVkrT66;#KUcUYEO1 ze0np9c0@>-Q?2M|c(7*;Bibi51vMI0D|!mO)7xuBoWU@Rq42r|6K^k*oM&XP|os^stU+BaSn3`>7lkhAOA+KpIs6a@TfCK!RDWSfBv7b z@$s4ErYYlWc#P#O3$$-LKPd@@d7nY+Z7YeUvbjZq8b z;1UZo1;AY6U0q1EFS_jOZ{9Iy_@(T9zu-49g?K%50kg$@cZn z_GcrnNp8dWQc0%)$;8LXdtbGSc<9imA< z34|$wJb7mC+5v*ENc02Mr=6pxvO^?EkWrZ*_SK3s;GF%+6J%tjB|z>wyXJa3y8w+N zKzQ}+aVnmWK>O5G@D*ze1ryi$;zhhGZlQ6kD_ZZOcgc`$+<#hXAHgc$NAJ2kraMF7 zSUtD`xqru>oGIw9f&vh`4NjBenF3uwbR0)G=)yfqK7*^Mg2VdU!o8#XTlRF0sLA;U zeE;*Dan@+4HlE`o19^!UL6W8kCIh8|4rv9R8@)^imSM^%cv}M!1Ds zkY0dTgC}Nl7eZOMY)c>Wg@}9cOQd|+&tO)z^|apZeXLcGGo(ty-$kYVSjND-%MS1& zqzaxH#m6q&lcr#3(nVVkvrYx9&$pyi!3&(CWJSs$Qowjk4wxx5r?2^+0zt4K)rKW{ zrU%iQqp%7FMF4hyYeX%PcW3fO?k+juwVDLl_%;aWhd@d%^tkM&f9Fm|vVj+0*mLUR z1WIfXqLulXqkp(ac_hecL57g*zkdqmEmQrHY3aa?8?gf$KQ1m_Svq_;A7Rrr_muGc zHE^DA>7^;+GnnGH-_K;QRgYB6hhu&LHNvMyWbQrm@_8g$q#p39NhIng;=csEu!U~g z-|i7X+X4y`SRzAj{OXAtuQ$(qWceL-VE8qBhiysh7pK&`gY%>aI@S(Qc&oD*Yw7z> z{2Ar8cUKOwdI0)jz8^9G4Zm=9!pNpIAQC5XNsp{=b@{bNJ(Fm~YJ zwP4_d9nYnn(Xgc`lBPI%a0vQ>4-DlZyRLJbXf$(Q&7>AFz(H++kQ!;U;lXJJh3roV z;<(&EF&0AmrLaZW4b_}Kt74*;b6d`D;pX>btPU)Hs>svJdH?F=QVr;$A*Yl3K*xA*uu4c;V}}vBWa_zqAL9^vVoV2-#CE z3@A^za}s5x){Mh)r+W%5H$O2I9GN zU`qLQ`)oU(JU6g?LkO&gN|SboZbX+COe?{uzRe_lA+BOOK(oQ{w?WehErlzPu}%TR zSpH6|)#&v_^+hSiA{$WS(g{9b@^W*$=8mMR-(A%TH89ANGzOccanxyF9EE>==<%P( z93f^~*l8`^uzg4NiLt)12N(+cW=jpGARFhFdCKhMe0)LxuhvNKyQk;CY<##s{ClSV z*20Y_PnQZL=yc0h3wQCAELB6ZTTV}@yf4o$L6%0rtku`Ymkj<#{-*+XxIs_i1N%g> Uo07}HH~|2lbxm|?uDjm2|U!@_y1>R7_yUnH`>VBAjVn-gHX1zgi>SAl(A$T^^|>?l2B3%Ws9_nEZgjy8gPvV0H(3EJYY z&JY9#AK?%$68u?;8D0l}xFU|)y7Gd5@x13!!S`JRoJR!c|AF%#cJ#oV81PVrXmyI{ z5{@TEpSg4%ijI!f3c3&y;eUp3UMu`kz#X=UECeY-w%8-CF?Z*fms?M}-D2;|^ZsR! zC4wqG4@W8Aho>LS!wH|g+UUu*-IKA`^HJodvsd#^kNQPF&`M3e;>lILhj`XQhmF~MKclSITos+G;Y(CJSbhBV~>(cT>NT%rr&xj(bD(V{^ z{}Xo8`tmWZ_sI9MOVV>I(12j9$@t&%QgD9j*w&4qtyQB`+P&YImNp!?+R52oJRO3TnR&;E5B;| z_`0)ZFR7Q*K=PelKB}e)8rAX53YF7JR%1dZDL=j=(4>Zmt^R2p6ap;F5Pb)Fe$#c; zS4rN6QgQ*+$9MN9+sQhGj0fYMp@(9?i^v)&(1>%o`B=iRml}?BzWDukeF;3!}4-!mf zJ|>)gqkX9DJnb6o4Li}FC|#)y8YA${$V!$H&#WXEvF8HhsG@nOQ3I1XoFNO%$e`t% ziazfg7$ezGZYx5Ldr$ALKVXnOm6Ga8<9tS~^z(J|) zFSaNDg#1z@|2Tzh3t^qphaZyHhn~u!y)u@JOxHwy*TZtouCPDD90@*3_k>}1}?rjS2Z z!UC1iar+na{p^U6qPq?Yul_|XkyF5?mm}{Vp@(`gFrSCY8d7mHba?i~6>8~KoGww& z0#PvPiC>ms8Mtm*oeLaF9;eTu*WCRojrySByM+#jcHvN|Vk@lr7e`hJGr^H4GNS}q z@(LYv8ita6%RXzP=0=59n+l6B;Kx2wEA$Y@E%8wzK`HfpoUy){^2+~YOWE*{KRP3h ztKys0f>d)YIW2SEQ2akkrEDS8iw7={zTX>Nn$n41Qsiwz@zt_I1%JGxf_2+N8B#jB zp^u~l->?c0C3m&LV@jyz;tE&{XyHRQSSF8i-Z?wRGjT8Hi1UeHNc}av>&~cf^6zBZ z!OK_*c^_;FeC<2dn|3nS5$2j%_ySe{3l#+`+Cx<`>|q9gsl5+E-bc|xYYG!~wF*fj ziK^Y!B>kk%+T#rjFqw_kXh^~jKMqYnby;Am4y8}RZQ3@NgGbJtD}o&`LT`u=_`yCo z3)XR8c+;(=rrQVQx%wpjwxPXzc>;^YM2aprvA53=MLddSNn(c@=1Ba>VWkbJsl=HJ zERpCAG$ys?*h9#!i=GW#INIrp{c%Q)RS_S0Q|tudxEQ{O>>0hkqW~E=v);x0ivQa4IVU6_BM`~TQMYHp@Y6--PYp^$QAivX0(!n z29Fdu3R)sR1D|!!~cX&BCbaf&j zk(k?d0`b-#-z-SjRev6cKe?z?*zhfLK3m9ne;e|C3Vk+`CJ#g$8yjTr=V;btm-7fV zEsDM)x&hZ?dbj5cOdm4!!*3vN!@dA9O&uPIVLgqf-_ojtP2VU~5ZDwr zbr_S8ibK@!Z)zyTmGcKfZGQN96=ua#ch0a#5B`F}oRC zEzap=0OATd4PQv;w*yYtJq&49X8f^ygGy4<8@d_du6(g27VAT}WM*J5*C=vajy@Yg z>n@8(O?{o)&SsaS3Z0NBf(4S(JAe)+m?%}3a|c77xexO>%j%i`f>C%YTMU=BYH!6( zfhSepVxrCWVz7mkFzF-hFTQ3@%cD>Xvj49H21sW0x^ZEo4Sk)Ar-89^IoZY@qLpD#uF zxw1BR1f{<&c^K`*(<%-0lDuzJtkv)X zsVg`X^hR#(ulE8h7gd|MKuh}W`4*QI29dN8HrNyqQL>IrZtxLR{Y6;_Z7geu#-yaC z=Ka0F+^U%tu?q19uWvxNOWn4y*gYP6&qtYcnL^GKYkdgcY^ozyz!3qf~JmvVR~w+YwvHG zBHL^~2G=p%40|oL#e|1Sa%Q4FqvAK!<|BTp*9A;)pU!T;3(=VgJdN({68tb<5CSGY zmdFbzKcAr#IU6?_bNk_KpegGqzzEOF52*v0LWA~A!C)|B##!?(9OHwl)!3&gMhqb4 z`c(F*LBLb+P6dLMy7bYI^|yCO@B;%tR@Lv$va;nI6gjd%WVb;t;#Qq zafmVu>-N68-%~XU29HA!6zZ)xdl^JIPrEab5Jk*&bbShj`$d~i*8G=r8B_TA7*ooM zC+MWxhr?zP5=mq?O&FV-4QC_2Bb?wF)-h0~(kES5!LhcjATPgze#J+(L$Fn6_?;bM zeTlhIZWoX=x)67NPQQxS^e%?L2(#wEay0DsM8air-w}Ejch` zc|`nSObV{Vgnv`M0OtLJJ-{CfRL>gF!0rz%l zg?M@5cPIdXW^ScT>saYD2$EDuEAXK!Us9LGjy`eR^D^wAzj{hXW_tHv8=~dU{!ns|Wa`h*Pl4LCt-q0!F zYVyc9j;_EBcS$5wKDH+XvD;hHlYUNY$wp7!(c680RIIY9&sM*t(aspKYl!5F0OC$@ zkh)Q^9u7=B555^aGfYzfC(qbdnjFXZ$bFEl z$Rso8_%H2yRP$-J4YYCdBR~0&*B#e!yK+}SQsIl;1&YO%=eT;4>!3Q~%E8iCdZ!bG zeQaZ$?3#|;c&%PBC3{k~+z4pt&)jCcVzFYYbJ95+ucEH>>gyl$P+yijs}n5yx~}lZ zd*IyhBwy--s_;<12Lk$hxGb~c#!_l&tyJf%_RIO@Q~M(mT^{){>prwBYmaFV5pchf&2Hbtc6 zimhbBRqD&B<~~dOHh_&e=^g*XDH@*Y?6|xbm#7mYkcutBRi#}!7>`s=+DL+!E80)w z4edHRMi-CK1ZbrVxH3(6g`_FJh%SNvSKk(!kC8}v^c_BAW>--RUt;PyvwHbujNW{RJ(FBpF9V zxp`EU1ex~>uIZ`BH98pn92ox3#`alMbsX2oiI5HZgNw+cEigHo0J3##x*oqy$E3FF zumG(asDpHG@(N&Q_2Q|^!a=jC;$aDVMcmUHK!#ovEX^goXm8Jb{dU`QUQnAMiQa%O zj84llzzq=(Q=FU15dSY1zId|u2URFf+bv`*QC40#a5nGsPmay!+2%VoJ)V&&Fn5A+ zc_*dpDlW2cw1V;z<|7ZbV#=1am)-uzSNrX}3~2>iA3D8+68C!4yVBPp-{Cvjn7{P8 zc2u*B$fZl)9=F~52vmPJj(;a}e)MK~=7lz@uad&iqF3}N9=z35uC)i&Q+oQ1kLj7Z z0O%u-SMi@mC6d%U6(gjqL92{Yjui689|2kksWx``FH~7v7Ijh#yx&caLMBRqDSY#Z zo*(rTH@yRFYZA zGBx7P)!vSi4%;}``D~22k}afi7Jm{J2PV}OBRkS4C52cHMzV@IG?)79dye{_NN-fE z{2o<%Qjk=!Q;v(6*eU|2J9Js2KMgRMakFSVl4D?zS*UZ%f;%=nFduO#4uti9w|9eeud_*@=r_ z26x~*#NVG$VHjgDg9Ccj{u+w9o_jftI#o2$>=*zE z&3v9rZw%2T1QB`3lBext8&Fo#mQ`;Qwq7~PfBe+3d*+`%?Cp5a=>GLdYywdzC@(ws z(l|$`kg?55PjLK=><-tgt{U;6*vEZgGv|;j5#pZ?aeE$@O4%$AHHIvaAbbBN(&FTf zJy{ao0%jgGR7-hs&^fO;B*F{O3FDd_WM`xjKgCA-N-7+sC|suhz(8sY#b?$nHB1>0 z%?fE=3&*xSsC2DV&pNHfD5A(RpGL_+4mSj070q!<@9?i1JO> zYA=Q(qCRQE1KV~fnh24#f&}abJs-cD(}Ol5MX(gPXe%C(nHyd8;J(~|;NVuFp4SO!tcME5--b0zOqRF_D>HBJ& z;Z9iBd@9q2hGM0$K!OA9PJ@nlq9LpGVB2bipI1!*(q&gGhOc!!#a?!M{LNA}2{q~(*z4M?DmIISp z*3Em3dmptjRce_^?4`el-{Y9kKZf;W;S;kdcVHv3;BeA6`|qSFg0}hz`mgk%qV3y( z|D9i>N=Tb5q;^n2ubji2B3YpgD`9?*59M&emT-_1AZkn6MyNy z{5^E4wmAf8(Gz^_hxrBB8ylF!VH-*heRs?U34h&=73-wGRP zTh&0R#x}9Ja;tj$SKbX8VcmZnt@VurQSxI37Cgr*AD_J0RgWtaFhb}wt;(ay#?XM{ zDSA=mnH55tN70aT6eiWqEojzGM$iaxAIvo{8ttWu`3q;fSim&Jq`Vd(@>!_+qv~D2 z0u1J`vQ*T0bRHgIvB`O3e(GU-%B}w`MHb)6K0XOg#{3 zQT62!V7n&c)UBspK}ZWhQXX@heyqC6w(^_qdNa5xl%1-C#RU0@klkwpj%2(ZaOlYXK?T=7({h7Fn_&1zBqDMve=TseG`y&&C#mA z@su%=S5?5}M~kJoJOM{xR0%ai(!#ErzAX?c1A1Eh%sDc-UGCYe*MKOqOcluQrlfbc zzTpI?txB?ml%Mit*;dMeZ?;49V_qKcv%*=7UP)3@2@MH zD2B915b87-|FC4igAjO;YR3mOIPUO(q9{XrL9NaB{`O*AJYUhW)Cq+0q1S2ns0vAb z;0SuXqW5D@Ze$3&)rU~khO(^|y~qzXiQ3v0D{C17%d^scXMiBxFydWx`zUU$uF;Xw zarEG3T21gLX}=?&MJ!g8;Pp|`UHw2ubjyRLEvxHKfvUV-FhD-mY0xexT)tMvi%};t#EhxS(7r<^Uq zSjX03&z%c~uwx5nS%=UYPYY`m6s*P2UgPw30dMQr9PBwFr&SNRD!#yj9@oYk8~V1w z5~i$RUA9rRU_TfSZX<5r89jy`Hvw%0qu=ocyIb!r`K03FK>m+7Ltts4H_Y+Pcw#{t zxrUes7lV#Mb(>a^yKY6)}m7O6^BEU$mb{U?xZ4 zi?~H*J_^w^&{W{AXA8MV;hP%@PeXJNET>3G($X{VV4$Ub71}&~_BKbQ3#vdk0DPv+ za|lGc)sV1N9u@XIbdeI6fYol?n%9v`AVhJVjZypCdc{XS!bSh!772I&?l zLQ`R%0a~K*Ou=X}-axLkXbHWWyuHd#qq+6|2Bu4|Q;&5UAEl&&DEP!q66zQ4q5B}^ zZz*{pyG4W2FV~;t7)jj4W-c?Hi8}$@*}4sarIr$%bc5GBeGSQRw&wgGcJ-N#kZu(u z2(yYtOi)?N8VeB&I;|Wyg@k+e4mCWX-LaiqnnVVwohKW{wQV#Z1Zk4Aguyf{7par> zhf=XA%W?;vom{0Uz8P7+``;K;Ccofb6r!29vcV%udO{Ox%x}iYXrB9aowWkIHUmZv z$7u_8*IM0eX_@x=cs}B1>&EZzY;bwpfwXsJ?M2nbz*kKPhl6Yhr&9YjqlKg0q-{Z!YNNQ&wF%BgPyw+lvJ)CBVV|A9Dq`q2&v zPXljCkD~NyCkyEZVh4YhrQ-HW-e7+#=Gk=G-Wt+0yT_dmzgH3xwB4Y;IDZ>902|1C zY_n>-piEG$_eng)wVkXdx8SVD)OME8p7_%ej8I1X%7+P)`8F-cJH4z)>c&me_!rmK z26fSQXz6%AmhS4xohPL)MaYJh29U5gxF`(fBa+}3RBEG>4lV?+jzs&)%G8{Grm|+4 zShFwu)^nqULkuRY4i<&<$<{KHRAV3z9d89lINf}5f1j}UhF5LyUkCpO3xI1`(G+rU z%bj>j@6D;^*`A))<3>S^7YP7HQzMT8(jFj_bmorE%Us{LAiV}*N} zM@ad!i!y4?L*Uqg_o{Co1a7_7tUpIyVynIyu|7~rlM1QAm)TzHujoFu!CXSh%omOB zp(wIl>lRNNl~Dz-$Qsu#Cr!EQg%_$z zcco*3FYLYkyKfCscsr>_wo(x=20+aRTe_lyDjOp0t(Koto;NI1`kEgUD{ZK)C$EN@MIj1g--Trj~7#~0Pc08>ofW#8Ebv$oP!cuoMu33cd#AR*w5_Pk%`_3Axe{B_+>FhCM|C8 zBf{Bi9ArDIw?7dCNu0}sq{uOf#BGF9^vZbI)l`9Qx(|1}!iYI01GoI&3&pJKZ{Oz+ zCM&U?1i2tY1_B)CvlClU(dN*|ZM`U~Ql^gUuN3mbDxv}jyUrhkiJW>#J(=cuM6tp6 zX4iu6l`pu4|J-IqHx*X7>a(Z=(vKq~)Qp|*(0Fdw(&SZDTQum9@|?r9_xlA$^f$!q z>@;**$F{y-OwC7 zgGGHCplMMC{%Lr5k>YdL^;L@gtQARnle=M(d?7$|=6xt@`hhz|EU4`{{HF&^4g~)+ zCqA?5WM;G*pPu{n&6H2IuSt59x1O5b@%rszfPXY2;QU2`d}aHIkm!SgGzn8)R9_@_ zJ(!jK%2GzAMxDZfre7R!_7*!6ljv#0t&`uFztF09?FRl7GG1skov#D8v3O6T1l|tS zh|1eko!6~@Ks7gK_{v5imCau^ney%JTi^}a+SaaK%^>a@K*kp~*3nc4FH@W!E7Y!9 z{v3A)@r0mvks#Eb=s*-9V_uh(7voAa$?lD=4PIdoQOnNy3+-g`3+C1*07r?uVeS%l z)6Bof?dBlpVDkdWy4DdGWvvS?t%_R@GZ6>xrMs%W$+00Tp+$#WdYsz;P`G$ZiC2`ZqF^Aus-PP{`Tg+@cx9Ta-%U22P%d` zIt_0qL(tgW(vR}1t<)rd9Y`g1(H)_7p;JW3nd22 z3#@2tF<_NR8E72-P!pA5PK@G%SkHxait{Fq`Xw>H?{^knYj|!x)1dih_=LlS4RMGA zEHJ@N3I3Hgbd?5Un;ixtE#()^LEcDC+9x*HsJ@We^5Q(vhTEj$?VsXJFSXhZSX||5 z#<0tF==?QFfvP({jn%f_G7rZv1&=G`+(_Rs1#F=tEe6DEGXi{mJow@{F{lPX=yCB? zCE{U=cbd>Lam5@+0r_mf=)vR=lI@@eu& z{F{=s1lw+?@^LRlbNlU^Bg|XJel!X_yv=0^AOe(V6ZSSsDh>3*e)arkT1=6)Xu$8 zBT3z(&pOfEP2-NvbRlRzF{7%USS}8}q61cev>|`>iB7s)cdzEd_*&w_?t%TqAcn_u z0x4ZgevV4Gs)EZNAE%H|5T)J+C2@DH%-WWp?zQdQxdAlMSr{Ls9F#@!Neaq)uL<{I zRR7+2`{gj2$GGA4fLfR_CovqWrsU$7+CNALSbn-{nG?X-Aa#QF*gnI*SD0G}&UC^2 z;->%p%-c(r__qZoovys12%;*sCXCKo9 zO_R>b^e51?*xNBg5z)%S=y8!&nUO;n`>V6x*@u_cZ8t6*u@S05-j}ww?cJAGx1~`V0TrK3? zvD;E2q@Sc0Ad3vf36ez1A-n~jt6}W3OfA}0!#&LJJr74iMhlf&v5B8{v2Y-DpO%|r zYI7soilOvyxq8)7>EC6W1f99RD;*Y<5AA zRe8myy5Etw;sw~8TBELX$1$`21j30NjGvTXKwmZY>hr%X?j0QCLpfh~n49g!cs7(V z*7IWi1N401C|-{I7{tn6^S|%?LRmu$zW)@9PdofE)bmmH-j1JF;KF0o< z2C|}U<4UPdeZ34s+Ej}2B-eRd>?H|vw)ECl|Mxt$-8w;iK~|DU2U=mT+jlKRUc4%6 z)W=K$85b<&^x8?tdNG?}?xs31fKd^yXXtf?wXN_Bw{4UlhU%v^e_H(=}! zWu@Lm1KL8n^odQCz<*s96!T!48;-Q-37=(gk`(|9!H-`)~>?9{cEuv z6*L5`nXmevsPjF$AJ|n~@mK#b+d(6{Y5xs=Tm9j?f9Qol*eTi_FUj+Yu`++mnAY*!R%_lEw^2 zBuW>TacNy#Imp@QC3~|jkSQwTN-PKg9bBM=zrTyFKmZQK6 z+sHf8Ca^*MYXGA0-?7nKgOf7=9Eq}P@d>+hT0YrP-SSt;jenIALp#mE&u~Yu-2QW% z;6&-6Jgg{?VHFQmtgq$3yWo2ZVbE*yTqo=lFX1t;WO}w{-;3pnRcZTsoHrj}(GY?@ ziveJl>?<0on(&L!Ksi>W)5#^?T59yMkQqHkCCjB_qO#E8ujw z?)+W`S`?l&9yD72D!_s5yzc%RT^h0UCc4LS^T9u0i}41@dI?W26Y~{gWn(-%B(I2w zrsbc!x`nRy;XD%=>1UQ?3LU!30E+NcY2wMCQS59ephT8f)crL(EpHkXT9{#;7mxHK z6&4H2%NCc}_*7m?!eZU?Px5S960;Tm2i;Mn2i<`uaRCh83?q`=ZUYcQ| z^M{hJ(GE@kS53K0?T;D>w#MU;a{h`_ju0^9XCn=*6HECzmDCR*fZ6*Gtfkyi80IpO z5X9L4Kt$hG?mGZ0m0O;Kcw(&hLiXYzus})3D)C$0_jk!JJ zScj`-e5b%9xm$o!o7$+QgboJ={mOaT2{(=HwW)09dMtl71b0bW*&OKyy&81%{>4zbRop_jVl$d>H)*oL2Lcb_y{k4&2PT;}a z+z_X7w{>xvA3jQjKmbqV=h+WAm^QF1VEMr`;{{BWV1_Nq1a!V6A+%Iah7@hW=_`B@sDjpVsW~8-U<)%$)+H+W-7Z2 z4#2fb|Mno~q)oYe{Ilcq_{(fqts_j}wU?a_cV5$5vt6@u><*4MCiaHzV~_E1O6@zkzNzFCgTg-y@TNjT~9@$-%eA?9?3`EV-?BCHtEehnH~*Xt6p3SaEw zsOE`9-FaDtZhcL5uRf(YIF8Q`;A4+?9%`+Tg!8;ls835R7}aBm^-#OaX-4csS7K^n zh#jI{3&7IsiTR=?+|QZJx_=F|Fuqx80qwUnt)bc~6LXVlzFUDZ2>^dIektycT*uP- z4_(q3Dgh`uFj5M%GZuyOQ{MaqLHPMJ&N)#VuW`o~(|qflGppWP zRkrMeAVi}2?uysJbRT#R^&2#-*npj#aW%VM?0~7bHOdna7MRzi4?&ktw%;s`$UG~s zV2a)#awZq`&Huy7fuv%i%1YxVgYPJsTVWy09G0Eh&$yzLiy3~Z6}$%p=YaUy`8Vr; zdKK(?-Ye(HD>u$Lkuj88kD@X_*MqiGk9y)o%To0U3LW-@{b+C^=1WdMv)|bErB|^* zM9v2P7ueb8r!}Mgt^;`ZqB3;458tNPhziD@|9`Qbx3(sZO@4zBlow(Ejag{oQTbIX z%CSY=8SAPOn@am@pN~y{76w5g!U{F{Fe0P0q0!HUg+a%8an=*B(UwgNRi|3f?)=v6 z_%#fjV{gl`YAq!4lf0EyD+?09c-}Ih7FWfS^HrikF8X zN4Edq@!I>CM_&YhiEDCa6IL=xujV;OU|RnEmb~ahTVx%m9oya7Nv|Rpm!-P8nsrSa zjZELU#42&!OsPQnNwc^8S(bxFWA#QepDj*HDWgnp=V*Q)XY0HDdJwO1TV-0Dc7Q%> z$oe`hAGWT$0EjCINI`X({fvr%whgH=%e-PoSfjEqrOsw5aOhz2oxa)oK#!eG*G4X)-yPicJ^3p< zOO_=0Y-9f;pWQm5t*_~hjM^(GMtXe4q%F;g;*Yx(Z_0rnW4PZ|q~8-FH6EKHv$v-> zL>z%L!;<&=08Ibe#-ux-W}(7_mJLj8@@YB$F0 z4#QRyDwkpgU@0hqP-VDn^VkH;&@M4*6DKo-oUQ(YwuU8QBi#k$XTxb56I)LKIk8uC zAt;eIRIIohVQ-tUIc>HhGP&yw5`qO=2|0y7A8_C9kw@)N*cg9rV!sjI(FzfKogNz{zcRD=JWa zfCV8gD=S)zg|wynz$VV&OF+aP1bHYc{}vJOwWj@$+_9p;)VE(ekRd4^YWP5|7&ZXB zzxIs6&lR2u7WaaD_}7|Zy(KYU%R~s^kCT8I(E@ZWFZ($n*gzDoDUe3-%cs$CC%}Y3 zT=m6kdOJV8Vytt=%&#atDIAn62!p~Xg6{D@bC;E1+bz@`PQaiMpx(GQc|dwCUyBLJ zgTQ6k)!{5Ww5}{X{DM?C=+V}wyD*85Dl80>QLx5Z2jIPmM$gldT(&-&?;f%5>@XjO zmeQ!&u#en##f!^%mU+GxjqZ;m-^Uq!%h)utZOabWnKm&ch%G#h*SKZ&R*eMzy4Rq? zUSCSU^PM-N$E$;5CDRIDkQ)yL-5U5Ue2OprEps4U$T=F+3~(v~rd(byo32NgE!c)> z?Gf7_{_(dPtA1L;^r?s;odd&WaQ)xnf*JA@X((B{ZDTj!Z_(d$1IP>^&tD@U|M28@ zSop;ic)vW-Ku+O1mN-2&p*Ddx`6!not=Z_ z2@*Tab*kQkHi;dJplTU#Il|a_4VI;^Qio4$hca!~Yu;6^mLNbFs3RsAg12s<*@@x{ zqW2vq3tHe>It@R6SEZsDd(m;lg{OtRE!H#Nb^fNMFt-jwC+))=+dU)BDZN16<>7ck z-SUP~t5>L#5(?IoU{Ha>il7<8__K#XgkdSvjk};;0+c{-Ek?r7$uol<>wZR+FV}8R z2eO1zouly0h=p+4r{ReFD9NCMz7A_V)n8aucQ=oW$N?X~UMW*0;p#Icz5j57LKz}0 zk6btxh@V2V+%!W+nwXUc%+`MP6$14sGT@9iEzE_b?1l=?@Cxzz$VSqj{DP|DPi zweIjo_>_SB9NW!T@rC;^l$To=#^cQ!y1Tf1Kubj*ei%3kYDI-}cIulvz;A4zCR=jq z%aor!#W?D^`E1{RF4TGeV_%}cJK={vSulDJ5Ks5R#}$oAP)@Ud_-ty!eI9=Hb^LFo z%z}g83vdM};E(4w>;@%%LaK~|APYg*vvB6e6Q7;Hdq_vZ$~?#ef2xE%0+FHXjiR>Jk3H`^ue z8?BrcM3Z>@QXSOpAZKknr+@b8O}YL&#OT8MzP-VEcOQBGEPYmxCcyC=xv43{Se6aD z{QxHObrn6-WWk9Nv|Z~zB+gD`9f&SbP{4XVMBcx$nEc(9@vN&-~q9 z(bpw-3X=cAmI90Ie*}AEh#nVMRhhZ#f0KzVk}&+hlmaDP>$(lL6p3{}-ptUk@gQmC z*@h*^tw#hy!OBjSiO{O7jxK1JvgH@(gAzZn2ar=_P!-jb+;XFRPKh@&%R&5t%EB&I z9A?Ws0lf4N-C*x&-K0Qp%Z$0wSo*n>c5CdCt0zcH!8aftSPCTf&p!1meo{{B#w#$+ z;bCYvw@`uWt`G?Mm1FNA6S+4t_0S7}1VuGbbe!(Yo$c}DgrKg!y=r4c4Bs=g!A`s= zN8YcXhr)q{Q29Z9iF=rjID7+-)OvqqZ(p9&2wxT9>Z8lH`fE%<`hP)4>hiWpe8J8g6 z3uU`RIl&@GA-})i^|{{V@o#I<6xVbGPnqjDOPKJ-%d8er&7~Bq*X;!@t`>|oQz8+d zUaC?=pG)GwmB}U#%EtP9x)7@KIO44Ze%X9+S+OjGT9Tw{UFtK{dSmrf zW&uz9-%OyLs7~kql;IKN38+5r_QsR)t7yW{``wytz4u!d-PVl}#)7fW1x~ZZvH(E- zn$yLVy z000BX@jn#a~wIMKtQJ#0ah7V!2X{QT3qjfnBh&6v=)byHJ2^D>ND?*X_yax1o#hvH>v z6I+9x{_yigRLNbF=_iBteVvXzywP(`Wxi;MN@k*J8;%}!bi z?po3jkUv9$N>T&#IPa=7L8I-g*B*uorP#dV6FbKW)PsUVrvkd+O-&$IV?q_~;yt|8 zrxGOv>nbo1QL~N_1t$pTsDP5zVpH3(fiHJF!<^g7kruK0>iV`58K)60{zIw5JqSwm z`oQqV-Hd~vSafR9E<7c7S$0GdympHT-GzIPXRsmuKi@MxU6T!J1!x-IK?(YGK_XRHxj0?v;ju!O&Q8{qSM^bvK!pnAdSFf0Hz z^<%sp3osbvXYu18L0g8S_NKWq8f;n!UYUQMcAgTx z(Lq?5y81h{#u~u6?iGgd$=kKL`$acSKF=Gv#ENA++u{eo08pW<`ac!S@5K$}3$pRZ zJ?{_sNfH!;p4_v!@;kK=V0&EMKW0vg0Q7<)t>{5AMJ*rx0aP>lKI)61a6nX=Up3Pw zj|3x^t@}tz??LIb^WaWp^T%5lu+}Dj04-gHFY_#0%I&rdP=p>sT0D=VRyw-7L}xSs z&L|GwtY0ZQQbMfq^A1#2tL~&!ZuKUEK*hZgL(~@Fvh3bTpH-y09+xppQr522kZ|xB z3(*isy{DH3B&ac7<g`ez#I&sV5v?lijp6^gYay1cFqE`sY+d)-qf}fN#T1Rk1 za70HhkND#v3)GpHASPznPMy|bYQg|7qpE{OG~+>OykHDBChYj_PnAExj|Z?gzYAa> zv`LoeOcWyI{Dk%&(g2;q4mG4vT)2(Db}z3b?YH_KoXbt~W@T@7L?n+h6V3#xlhc8W zGC(2hu7gEaUiKxkG1K(3?plix$ z_&{$?vhO5LS6;Y%{I2T5JlcE1QcTp$-|tEvsoYDcT<50D5qAFmVYOY? zHs`n}8!VSkr{T}3K>>l40^S7r%E9 Z2kLH^2BmLsep>;uwQ|HhKI(Vn{{a}}(B}XE literal 0 HcmV?d00001 diff --git a/pandasai_agent/chat_analysis.py b/pandasai_agent/chat_analysis.py index bf7334a..afc0101 100644 --- a/pandasai_agent/chat_analysis.py +++ b/pandasai_agent/chat_analysis.py @@ -8,14 +8,14 @@ from pandasai.llm import OpenAI, BambooLLM, AzureOpenAI from pandasai.responses.streamlit_response import StreamlitResponse import pandasai_agent.config as config -from helper import detect_image_path +from smolagents_agent.helper import detect_image_path from pandasai.exceptions import PandasAIApiCallError import sys from st_aggrid import AgGrid from langchain_groq.chat_models import ChatGroq import json from dotenv import load_dotenv -from generate_html_report import generate_html_from_json +from pandasai_agent.generate_html_report import generate_html_from_json import logging logging.basicConfig(level=logging.DEBUG) diff --git a/tools/chat_analysis.xml b/tools/chat_analysis.xml index b56c576..8cf64d6 100644 --- a/tools/chat_analysis.xml +++ b/tools/chat_analysis.xml @@ -24,18 +24,19 @@ mkdir -p outputs_dir && #set $sanitized_input_dataset = re.sub('[^\w\-_\.]', '_', $input_dataset.element_identifier.strip()) ln -sf '$input_dataset' "./${sanitized_input_dataset}"; + ln -sf '$input_dataset' "outputs_dir/${sanitized_input_dataset}"; echo "sanitized input dataset: $sanitized_input_dataset" && echo "input dataset: $input_dataset" && echo "$input_dataset.ext" && - ln -sf $__tool_directory__/.env /ChatAnalysis/.env && - python /ChatAnalysis/chat_dspy.py + python /ChatAnalysis/dspy_agent/chat_dspy.py --user_id "$__user_name__" --openai_key_file "$openai_api_key_file" --groq_key_file "$groq_api_key_file" --chat_history "$chat_history" --outputs_dir "./outputs_dir" --input_file "$input_dataset" - --input_type "$input_dataset.ext" && + --input_type "$input_dataset.ext" + --file_name "${sanitized_input_dataset}" && echo "finished" && echo "working dir now: '.' "