From 77805cc9c27212da891efe7870758b0a6138fce6 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Tue, 13 May 2025 15:24:19 -0400 Subject: [PATCH 1/7] summary file and not use display_response --- Chat.py | 135 ++++++++++++++++++++------------------------------------ 1 file changed, 49 insertions(+), 86 deletions(-) diff --git a/Chat.py b/Chat.py index 978fe73..589551c 100644 --- a/Chat.py +++ b/Chat.py @@ -472,7 +472,11 @@ def preview_dataset(self, file): current_data_type = self.input_data_type pandas_compatible_types = ['csv', 'tsv', 'xlsx', 'xls', 'json', 'parquet', 'h5', 'bed'] if current_data_type in pandas_compatible_types and isinstance(data, pd.DataFrame): - generated_summary_path = self.generate_and_save_pandas_summary_csv(data) + if not st.session_state.get("summary_stats_csv_path", None): + generated_summary_path = self.generate_and_save_pandas_summary_csv(data) + st.session_state["summary_stats_csv_path"] = generated_summary_path + else: + generated_summary_path = st.session_state.get("summary_stats_csv_path", None) if generated_summary_path: self.summary_stats_csv_path = generated_summary_path # Store path @@ -513,6 +517,7 @@ def save_chat_history(self): "analysis_file_path": st.session_state.get("analysis_file_path", ""), "input_data_type": st.session_state.get("input_data_type", ""), "bookmarks": st.session_state.get("bookmarks", []), + "summary_stats_csv_path": st.session_state.get("summary_stats_csv_path", ""), } with open(self.chat_hisory_file, "w") as f: json.dump(history, f, indent=2) @@ -535,60 +540,14 @@ def load_chat_history(self): st.session_state["analysis_file_path"] = history.get("analysis_file_path", "") st.session_state["input_data_type"] = history.get("input_data_type", "") st.session_state["bookmarks"] = history.get("bookmarks", []) + st.session_state["summary_stats_csv_path"] = history.get("summary_stats_csv_path", "") else: # File is empty; initialize session state with defaults. st.session_state["messages"] = [] st.session_state["eda_report"] = "" st.session_state["memory"] = deque(maxlen=15) st.session_state["bookmarks"] = [] - - - def display_bookmark_manager(self): - st.title( "Bookmark Manager") - bookmarks = st.session_state.get("bookmarks", []) - if not bookmarks: - st.info("No bookmarks saved.") - return - - for i, b in enumerate(bookmarks): - if not b: - continue - rawq = b.get("question", "Unknown question") - rawa = b.get("answer", "No answer saved") - - question = rawq if rawq else "Unknown question" - answer = rawa if rawa else "No answer saved" - with st.expander(f"Bookmark {i + 1}: {question[:60]}"): - st.markdown(f"**Question:** {question}") - st.markdown(f"**Answer:**\n{answer}") - - if b.get("plots"): - st.markdown("**Saved Plots:**") - for path in b["plots"]: - if os.path.exists(path): - st.image(path, caption=os.path.basename(path)) - - if b.get("files"): - st.markdown("**Saved Files:**") - for path in b["files"]: - if os.path.exists(path): - with open(path, "rb") as f: - st.download_button( - label=f"Download {os.path.basename(path)}", - data=f, - file_name=os.path.basename(path), - key=f"bm_dl_{i}_{path}" - ) - - # if st.button("πŸ” Rerun this query", key=f"rerun_bookmark_{i}"): - # st.session_state["prefilled_input"] = b["question"] - # - - # if st.button("πŸ—‘οΈ Delete", key=f"delete_bookmark_{i}"): - # st.session_state["bookmarks"].pop(i) - # self.save_chat_history() - # st.success("Bookmark deleted.") - # st.experimental_rerun() + st.session_state["summary_stats_csv_path"] = "" def load_dataset_preview(self): @@ -1045,14 +1004,15 @@ def run_eda(self, temp_file_path): st.session_state["messages"].append(eda_result_message) st.session_state["memory"].append(f"Assistant (EDA): {report_text}") - self.display_response( - explanation=report_text, - plot_paths=parsed.get("plots", []) if parsed else [], - file_paths=file_paths, - next_steps_suggestion=" \n* ".join(parsed.get("next_steps_suggestion", [])) if parsed else "", - middle_steps=middle_steps - ) + # self.display_response( + # explanation=report_text, + # plot_paths=parsed.get("plots", []) if parsed else [], + # file_paths=file_paths, + # next_steps_suggestion=" \n* ".join(parsed.get("next_steps_suggestion", [])) if parsed else "", + # middle_steps=middle_steps + # ) self.save_chat_history() + st.rerun() except Exception as e: st.error(f"Error during EDA: {e}") @@ -1105,14 +1065,15 @@ def process_response(self, response, middle_steps=""): }) st.session_state["memory"].append(f"{role.capitalize()}: Multiple candidate solutions generated.") # Display candidate solutions - self.display_response( - explanation="Multiple candidate solutions generated.", - plot_paths=[], - file_paths=[], - next_steps_suggestion=next_steps, - middle_steps=middle_steps, - candidate_solutions=candidate_list - ) + # self.display_response( + # explanation="Multiple candidate solutions generated.", + # plot_paths=[], + # file_paths=[], + # next_steps_suggestion=next_steps, + # middle_steps=middle_steps, + # candidate_solutions=candidate_list + # ) + else: message = { @@ -1133,13 +1094,13 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append(f"{role.capitalize()}: {message['explanation']}") - self.display_response( - message["explanation"], - message["plots"], - message["files"], - message["next_steps_suggestion"], - message["middle_steps"] - ) + # self.display_response( + # message["explanation"], + # message["plots"], + # message["files"], + # message["next_steps_suggestion"], + # message["middle_steps"] + # ) else: st.session_state["messages"].append({ @@ -1162,14 +1123,14 @@ def process_response(self, response, middle_steps=""): "middle_steps": middle_steps }) st.session_state["memory"].append("Assistant: Multiple candidate solutions generated.") - self.display_response( - explanation="", - plot_paths=[], - file_paths=[], - next_steps_suggestion=next_steps, - middle_steps=middle_steps, - candidate_solutions=candidate_list - ) + # self.display_response( + # explanation="", + # plot_paths=[], + # file_paths=[], + # next_steps_suggestion=next_steps, + # middle_steps=middle_steps, + # candidate_solutions=candidate_list + # ) else: message = { @@ -1191,13 +1152,13 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append("Assistant: " + message["explanation"]) - self.display_response( - message["explanation"], - message["plots"], - message["files"], - message["next_steps_suggestion"], - message["middle_steps"] - ) + # self.display_response( + # message["explanation"], + # message["plots"], + # message["files"], + # message["next_steps_suggestion"], + # message["middle_steps"] + # ) # Case 3: Response is a plain string. @@ -1217,6 +1178,8 @@ def process_response(self, response, middle_steps=""): "role": "assistant", "content": f"Response received:\n\n{response}\n" }) + self.save_chat_history() + st.rerun() From 76e6552041db1466c737a4501d5105ecc54fba04 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Tue, 13 May 2025 15:25:50 -0400 Subject: [PATCH 2/7] deleted display_response --- Chat.py | 144 -------------------------------------------------------- 1 file changed, 144 deletions(-) diff --git a/Chat.py b/Chat.py index 589551c..80a709a 100644 --- a/Chat.py +++ b/Chat.py @@ -618,111 +618,6 @@ def display_middle_steps(self, steps_list): elif seg_type == "code": st.code(seg_text) - def display_response(self, explanation, plot_paths, file_paths, next_steps_suggestion, middle_steps="", candidate_solutions=None): - with st.chat_message("assistant"): - # Clean explanation and next steps text. - explanation = clean_text(explanation) - next_steps_suggestion = clean_text(next_steps_suggestion) - - msg_idx = len(st.session_state["messages"]) - 1 - - # If candidate solutions are provided, display them separately. - if candidate_solutions is not None: - st.markdown("### Candidate Solutions") - for idx, candidate in enumerate(candidate_solutions, start=1): - with st.expander(f"Candidate {idx}: {candidate.get('option', 'Option')}"): - st.markdown(f"**Explanation:** {candidate.get('explanation', '')}") - st.markdown(f"**Pros:** {candidate.get('pros', '')}") - st.markdown(f"**Cons:** {candidate.get('cons', '')}") - # A button to allow the user to refine this candidate solution. - if st.button("Refine this solution", key=f"refine_candidate_{msg_idx}_{idx}"): - # Pre-fill input with candidate details for refinement. - st.session_state["prefilled_input"] = candidate.get("option", "") + " " + candidate.get("explanation", "") - else: - # Display the explanation text normally. - if "count" in explanation and "mean" in explanation and "std" in explanation: - st.code(explanation) - else: - st.markdown(explanation) - - # Display intermediate steps if available. - if middle_steps: - # self.display_middle_steps(middle_steps) - with st.expander("View Intermediate Steps"): - st.markdown(middle_steps) - - # Display any generated plots. - for plot_path in plot_paths: - if plot_path and os.path.exists(plot_path): - image = Image.open(plot_path) - file_name = os.path.basename(plot_path) - file_name_no_ext = os.path.splitext(file_name)[0] - st.image(image, caption=file_name_no_ext) - - # Display file download buttons for any generated files. - for file_path in file_paths: - if file_path and os.path.exists(file_path): - - if file_path.lower().endswith(".csv"): - try: - df = pd.read_csv(file_path) - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - if file_path.lower().endswith(".tsv"): - try: - df = pd.read_csv(file_path, sep="\t") - st.markdown(f"Preview of **{os.path.basename(file_path)}**:") - st.dataframe(df) - except Exception as e: - print(f"Error reading CSV file {os.path.basename(file_path)}: {e}") - - unique_key = str(uuid.uuid4()) - with open(file_path, "rb") as f: - st.download_button( - label=f"Download {os.path.basename(file_path)}", - data=f, - file_name=os.path.basename(file_path), - key=f"download_{unique_key}" - ) - - bookmark_data = { - "question": st.session_state["messages"][-2]["content"] if len(st.session_state["messages"]) > 1 else "Unknown", - "answer": explanation, - "plots": plot_paths, - "files": file_paths, - } - - if st.button("πŸ”– Bookmark this response", key=f"bookmark_{msg_idx}"): - st.session_state["bookmarks"].append(bookmark_data) - st.session_state["messages"][msg_idx]["bookmarked"] = True - self.save_chat_history() - st.rerun() - st.success("Response bookmarked!") - - - if st.session_state.get("db_available", False): - if not st.session_state.get(f"feedback_submitted_{msg_idx}", False): - col1, col2 = st.columns(2) - # The on_click callback immediately stores the feedback. - col1.button("πŸ‘", key=f"thumbs_up_{msg_idx}", on_click=self.submit_feedback_response, args=("Yes", msg_idx)) - col2.button("πŸ‘Ž", key=f"thumbs_down_{msg_idx}", on_click=self.submit_feedback_response, args=("No", msg_idx)) - else: - st.info("Feedback recorded!") - # Allow the user to add or update an optional comment. - comment = st.text_area("Optional comment:", key=f"feedback_comment_{msg_idx}") - if st.button("Update Comment", key=f"update_comment_{msg_idx}"): - feedback_id = st.session_state.get(f"feedback_id_{msg_idx}") - update_feedback_comment(feedback_id, comment) - st.success("Comment updated!") - - if not candidate_solutions and next_steps_suggestion: - suggestions = [s.strip() for s in next_steps_suggestion.split("\n") if s.strip()] - self.display_suggestion_buttons(suggestions) - st.markdown("Please let me know if you want to proceed with any of the suggestions or ask any other questions.") - - def display_chat_history(self): messages = st.session_state.get("messages", []) @@ -1004,13 +899,6 @@ def run_eda(self, temp_file_path): st.session_state["messages"].append(eda_result_message) st.session_state["memory"].append(f"Assistant (EDA): {report_text}") - # self.display_response( - # explanation=report_text, - # plot_paths=parsed.get("plots", []) if parsed else [], - # file_paths=file_paths, - # next_steps_suggestion=" \n* ".join(parsed.get("next_steps_suggestion", [])) if parsed else "", - # middle_steps=middle_steps - # ) self.save_chat_history() st.rerun() except Exception as e: @@ -1064,16 +952,6 @@ def process_response(self, response, middle_steps=""): "middle_steps": middle_steps }) st.session_state["memory"].append(f"{role.capitalize()}: Multiple candidate solutions generated.") - # Display candidate solutions - # self.display_response( - # explanation="Multiple candidate solutions generated.", - # plot_paths=[], - # file_paths=[], - # next_steps_suggestion=next_steps, - # middle_steps=middle_steps, - # candidate_solutions=candidate_list - # ) - else: message = { @@ -1094,13 +972,6 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append(f"{role.capitalize()}: {message['explanation']}") - # self.display_response( - # message["explanation"], - # message["plots"], - # message["files"], - # message["next_steps_suggestion"], - # message["middle_steps"] - # ) else: st.session_state["messages"].append({ @@ -1123,14 +994,6 @@ def process_response(self, response, middle_steps=""): "middle_steps": middle_steps }) st.session_state["memory"].append("Assistant: Multiple candidate solutions generated.") - # self.display_response( - # explanation="", - # plot_paths=[], - # file_paths=[], - # next_steps_suggestion=next_steps, - # middle_steps=middle_steps, - # candidate_solutions=candidate_list - # ) else: message = { @@ -1152,13 +1015,6 @@ def process_response(self, response, middle_steps=""): "middle_steps": message["middle_steps"] }) st.session_state["memory"].append("Assistant: " + message["explanation"]) - # self.display_response( - # message["explanation"], - # message["plots"], - # message["files"], - # message["next_steps_suggestion"], - # message["middle_steps"] - # ) # Case 3: Response is a plain string. From 69b9b176805eb7823f2e89c7151ee6247a1c7fc5 Mon Sep 17 00:00:00 2001 From: Qiu <56094690+qchiujunhao@users.noreply.github.com> Date: Wed, 14 May 2025 10:30:43 -0400 Subject: [PATCH 3/7] fix an issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when refreshing the app on the Bookmarks page, it would show 'No bookmarks available' even if bookmarks existed. This happened because the main Chat page didn’t run and therefore didn’t load the chat history. This issue is now fixed. --- Chat.py | 17 +++++++----- pages/1_Bookmarks.py | 63 +++++++++++++++++++------------------------- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/Chat.py b/Chat.py index 80a709a..ae03874 100644 --- a/Chat.py +++ b/Chat.py @@ -521,6 +521,11 @@ def save_chat_history(self): } with open(self.chat_hisory_file, "w") as f: json.dump(history, f, indent=2) + bookmark_history = { + "bookmarks": st.session_state.get("bookmarks", []), + } + with open("bookmarks.json", "w") as f: + json.dump(bookmark_history, f, indent=2) def load_chat_history(self): @@ -771,8 +776,8 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): "- Always consider to generate plots or files to support your answer.\n" "- Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' - '"plots": ["" (leave empty if no plots are needed)], ' - '"files": ["" (leave empty if no files are needed)], ' + '"plots": ["" (leave the list empty if no plots are needed)], ' + '"files": ["" (leave the list empty if no files are needed)], ' '"next_steps_suggestion": ["List of possible next questions the user could ask to gain further insights. They should be questions. Only include this when the user has not explicitly asked for suggestions."] }' ) elif question_type == 1: @@ -786,8 +791,8 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): "- Always consider to generate plots or files to support your answer.\n" "- Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' - '"plots": ["" (leave empty if no plots are needed)], ' - '"files": ["" (leave empty if no files are needed)], ' + '"plots": ["" (leave the list empty if no plots are needed)], ' + '"files": ["" (leave the list empty if no files are needed)], ' '"next_steps_suggestion": ["List of possible next questions the user could ask to gain further insights. They should be questions. Only include this when the user has not explicitly asked for suggestions."] }' ) else: @@ -808,7 +813,7 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): "- Always consider to generate plots or files to support your answer.\n" "- Always call the final_answer tool, providing the final answer in one of the following dictionary formats (do not format as a JSON code block):\n\n" "Simple answer format:\n" - '{ "explanation": ["Your explanation text. in plain text. This can include detailed information or step-by-step guidance."], "plots": [""], "files": [""], "next_steps_suggestion": ["Suggestion 1", "Suggestion 2"] }\n\n' + '{ "explanation": ["Your explanation text. in plain text. This can include detailed information or step-by-step guidance."], "plots": ["" (leave the list empty if no plots are needed)], "files": ["" (leave the list empty if no files are needed)], "next_steps_suggestion": ["Suggestion 1", "Suggestion 2"] }\n\n' "Multiple candidate solutions format:\n" '{ "candidate_solutions": [ { "option": "Solution 1", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." }, { "option": "Solution 2", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." }, { "option": "Solution 3", "explanation": "Detailed explanation...", "pros": "Pros...", "cons": "Cons..." } ], "next_steps_suggestion": ["Which option would you like to refine?", "Or ask for more details on a candidate solution."] }' ) @@ -886,7 +891,7 @@ def run_eda(self, temp_file_path): # ) file_paths = parsed.get("files", []) - file_paths = [eda_file_path] + file_paths + file_paths = [eda_file_path] + file_paths if file_paths else [eda_file_path] eda_result_message = { "role": "assistant", diff --git a/pages/1_Bookmarks.py b/pages/1_Bookmarks.py index 1f1c78b..5d75da8 100644 --- a/pages/1_Bookmarks.py +++ b/pages/1_Bookmarks.py @@ -1,7 +1,8 @@ # pages/1_Bookmarks.py import streamlit as st import os -from PIL import Image # If displaying images from paths +from PIL import Image +import json st.set_page_config( @@ -69,13 +70,18 @@ st.title("πŸ”– Bookmark Manager") -# Ensure necessary session state variables are accessible -# These should have been set by Chat_Bot.py when it first ran. -output_dir = st.session_state.get("generate_file_path", "outputs_smolagents") # Default if not found +def load_chat_history(): + if os.path.exists("bookmarks.json"): + with open("bookmarks.json", "r") as f: + file_contents = f.read().strip() + if file_contents: + history = json.loads(file_contents) + st.session_state["bookmarks"] = history.get("bookmarks", []) + else: + st.session_state["bookmarks"] = [] -if "bookmarks" not in st.session_state: - st.session_state["bookmarks"] = [] # Initialize if somehow not present +load_chat_history() bookmarks = st.session_state.get("bookmarks", []) if not bookmarks: @@ -93,36 +99,33 @@ file_paths = b_data.get("files", []) with st.expander(f"Bookmark {i + 1}: {question[:60]}"): - st.markdown(f"**❓ Question:**\n```\n{question}\n```") - st.markdown(f"**πŸ’‘ Answer:**\n{answer}") # Assuming answer is markdown-compatible + st.markdown(f"**❓ Question:**\n{question}\n") + st.markdown(f"**πŸ’‘ Answer:**\n{answer}") - if plot_paths: + if plot_paths and not (len(plot_paths) == 1 and plot_paths[0] == ""): st.markdown("**πŸ“Š Saved Plots:**") for plot_path_in_bookmark in plot_paths: - # Construct full path if paths are stored relative or just basenames - # Assuming paths in bookmark_data are already correct relative to execution - # or are absolute. If relative to output_dir, prepend it. - # For simplicity, let's assume plot_path_in_bookmark is usable as is - # or is a full path. If it's just a basename: - # actual_plot_path = os.path.join(output_dir, os.path.basename(plot_path_in_bookmark)) - actual_plot_path = plot_path_in_bookmark # Use this if paths are stored fully qualified or correctly relative - - if os.path.exists(actual_plot_path): + actual_plot_path = plot_path_in_bookmark + if actual_plot_path == "": + continue + + if actual_plot_path and os.path.exists(actual_plot_path): try: image = Image.open(actual_plot_path) st.image(image, caption=os.path.basename(actual_plot_path)) except Exception as e: st.error(f"Could not load plot {os.path.basename(actual_plot_path)}: {e}") else: - st.warning(f"Plot not found: {actual_plot_path}") + pass - if file_paths: + if file_paths and not (len(file_paths) == 1 and file_paths[0] == ""): st.markdown("**πŸ“„ Saved Files:**") for file_path_in_bookmark in file_paths: - # actual_file_path = os.path.join(output_dir, os.path.basename(file_path_in_bookmark)) - actual_file_path = file_path_in_bookmark # Similar assumption as plots + actual_file_path = file_path_in_bookmark + if actual_file_path == "": + continue - if os.path.exists(actual_file_path): + if actual_file_path and os.path.exists(actual_file_path): try: with open(actual_file_path, "rb") as f_download: st.download_button( @@ -134,16 +137,4 @@ except Exception as e: st.error(f"Could not prepare file {os.path.basename(actual_file_path)} for download: {e}") else: - st.warning(f"File not found: {actual_file_path}") - - # Add delete/rerun functionality if desired (would need to modify st.session_state.bookmarks and save) - # e.g., if st.button("Delete Bookmark", key=f"delete_bm_{i}"): - # st.session_state.bookmarks.pop(i) - # # Need a way to trigger save_chat_history() from StreamlitApp if it's responsible, - # # or manage bookmark saving directly via session state + json persistence here. - # # For now, keep it simple. - # st.experimental_rerun() - -# If you have common sidebar elements (like API config) that should appear on all pages, -# you might need to duplicate that logic here or move it to a shared utility function. -# For now, the Bookmarks page is simple and doesn't re-declare the LLM config sidebar. \ No newline at end of file + pass From d00ab8d19afdd17dd6c07e1ef5025411e7c0e5b5 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Wed, 14 May 2025 12:17:41 -0400 Subject: [PATCH 4/7] prompt and a eda button changed --- .gitignore | 3 ++- Chat.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 3e9cdb2..692c87b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,5 @@ groq_api_key.txt *.pdf generate_md_report.py *.md -outputs/* \ No newline at end of file +outputs/* +user_config_* \ No newline at end of file diff --git a/Chat.py b/Chat.py index ae03874..65bb14f 100644 --- a/Chat.py +++ b/Chat.py @@ -774,6 +774,7 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): f"- You should find an appropriate method to generate plots for this query. If a plot or file is generated, save it in the directory {self.output_dir} with a random numerical suffix to prevent overwrites.\n" "- Do not generate filenames like 'random_forest_model_XXXX.joblib'.\n" "- Always consider to generate plots or files to support your answer.\n" + f"- If plots are generated, if possible, put the data used to generate the plots in csv files in the {self.output_dir} directory.\n" "- Always call the final_answer tool, providing the final answer in the following dictionary format (do not format as a JSON code block):\n" '{ "explanation": ["Your explanation here, in plain text. This can include detailed information or step-by-step guidance."], ' '"plots": ["" (leave the list empty if no plots are needed)], ' @@ -811,6 +812,7 @@ def get_agent_prompt(self, dataset_path, user_question, question_type: int=2): f"- If a plot or file is generated, save it in the {self.output_dir} directory with a random numerical suffix to prevent overwrites.\n" "- Do not generate filenames like 'random_forest_model_XXXX.joblib'.\n" "- Always consider to generate plots or files to support your answer.\n" + f"- If plots are generated, if possible, put the data used to generate the plots in csv files in the {self.output_dir} directory.\n" "- Always call the final_answer tool, providing the final answer in one of the following dictionary formats (do not format as a JSON code block):\n\n" "Simple answer format:\n" '{ "explanation": ["Your explanation text. in plain text. This can include detailed information or step-by-step guidance."], "plots": ["" (leave the list empty if no plots are needed)], "files": ["" (leave the list empty if no files are needed)], "next_steps_suggestion": ["Suggestion 1", "Suggestion 2"] }\n\n' @@ -1133,8 +1135,8 @@ def run(self): if os.path.exists(st.session_state["analysis_file_path"]): if st.sidebar.button("Correlation Matrix", key="corr_matrix"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the correlation matrix of the features.") - if st.sidebar.button("Missing Values", key="missing_values"): - self.handle_user_input(st.session_state["analysis_file_path"], "What are the missing values in the dataset?") + if st.sidebar.button("Identify missing values & drop sparse columns", key="missing_values"): + self.handle_user_input(st.session_state["analysis_file_path"], "Are there any missing values in the dataset? If so, which columns contain them? If applicable, remove the columns with mostly missing values and return the modified dataset.Only return the dataset if it was modified.") if st.sidebar.button("Numerical Feature Distribution", key="num_dist"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the distribution of numerical features.") # if st.sidebar.button("Summary Statistics", key="summary_stats"): From b1ad26c66e90ff6e0c7fb227265067da35d5cf7c Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Wed, 28 May 2025 15:29:27 -0400 Subject: [PATCH 5/7] change Dockerfile for nicegui + smolagents --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7569f97..50cc98f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,11 +30,9 @@ COPY . . RUN pip install --no-cache-dir --upgrade pip # Install Python dependencies from requirements.txt -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements_nicegui.txt -# Expose Streamlit’s port -EXPOSE 8501 -ENV STREAMLIT_SERVER_PORT=8501 +EXPOSE 8090 # Adjust permissions if needed RUN chmod -R 777 /ChatAnalysis \ No newline at end of file From 801945a1ca407c3946b7d237798a56a029a4d3ad Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 13 Jun 2025 21:32:23 -0400 Subject: [PATCH 6/7] change to 0.3.0 --- .dockerignore | 9 ++++++++- .gitignore | 6 +++++- Dockerfile | 4 ++-- tools/chat_analysis.xml | 27 +++++++++++++-------------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/.dockerignore b/.dockerignore index 414ec91..6afb4ca 100644 --- a/.dockerignore +++ b/.dockerignore @@ -35,4 +35,11 @@ generate_files/* generate_files_0/* generated_file/* generated_files/* -user_config_openai.key \ No newline at end of file +user_config_openai.key +test_uploader.* +test_display_gxy.py +outputs_dir/* +outputs_smolagents_nicegui/* +!examples.json +__pycache__ +.*key diff --git a/.gitignore b/.gitignore index 692c87b..d0bcc30 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,8 @@ groq_api_key.txt generate_md_report.py *.md outputs/* -user_config_* \ No newline at end of file +outputs_dir/ +user_config_* +*.pkl +test*.py +test*.xml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 50cc98f..1d69dd5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,9 +30,9 @@ COPY . . RUN pip install --no-cache-dir --upgrade pip # Install Python dependencies from requirements.txt -RUN pip install --no-cache-dir -r requirements_nicegui.txt +RUN pip install --no-cache-dir -r requirements_nicegui_dspy.txt -EXPOSE 8090 +EXPOSE 9090 # Adjust permissions if needed RUN chmod -R 777 /ChatAnalysis \ No newline at end of file diff --git a/tools/chat_analysis.xml b/tools/chat_analysis.xml index 4e50efb..e817127 100644 --- a/tools/chat_analysis.xml +++ b/tools/chat_analysis.xml @@ -1,13 +1,13 @@ - + - quay.io/goeckslab/chatanalysis:release + quay.io/goeckslab/chatanalysis:0.3.0 - 8501 + 9090 / @@ -19,23 +19,22 @@ #import re echo "Starting Streamlit" && echo "working dir now: `pwd`" && - mkdir -p generated_files && + mkdir -p outputs_dir && #set $sanitized_input_dataset = re.sub('[^\w\-_\.]', '_', $input_dataset.element_identifier.strip()) ln -sf '$input_dataset' "./${sanitized_input_dataset}"; echo "sanitized input dataset: $sanitized_input_dataset" && echo "input dataset: $input_dataset" && echo "$input_dataset.ext" && ln -sf $__tool_directory__/.env /ChatAnalysis/.env && - streamlit run /ChatAnalysis/Chat.py - "$__user_email__" - "$openai_api_key_file" - "$groq_api_key_file" - "$chat_history" - "./generated_files" - "$input_dataset" - "$input_dataset.ext" - --browser.gatherUsageStats false && - echo "Streamlit finished" && + python /ChatAnalysis/chat_dspy.py + --user_id "$__user_email__" + --openai_key_file "$openai_api_key_file" + --groq_key_file "$groq_api_key_file" + --chat_history "$chat_history" + --outputs_dir "./outputs_dir" + --input_file "$input_dataset" + --input_type "$input_dataset.ext" && + echo "finished" && echo "working dir now: '.' " ]]> From 09e7b6c98dd8e5ddf4e8d1a5b83c35203ce69410 Mon Sep 17 00:00:00 2001 From: JunhaoQiu <56094690+qchiujunhao@users.noreply.github.com> Date: Fri, 13 Jun 2025 21:38:51 -0400 Subject: [PATCH 7/7] Copilot suggestion Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Chat.py b/Chat.py index 65bb14f..27810bb 100644 --- a/Chat.py +++ b/Chat.py @@ -1136,7 +1136,7 @@ def run(self): if st.sidebar.button("Correlation Matrix", key="corr_matrix"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the correlation matrix of the features.") if st.sidebar.button("Identify missing values & drop sparse columns", key="missing_values"): - self.handle_user_input(st.session_state["analysis_file_path"], "Are there any missing values in the dataset? If so, which columns contain them? If applicable, remove the columns with mostly missing values and return the modified dataset.Only return the dataset if it was modified.") + self.handle_user_input(st.session_state["analysis_file_path"], "Are there any missing values in the dataset? If so, which columns contain them? If applicable, remove the columns with mostly missing values and return the modified dataset. Only return the dataset if it was modified.") if st.sidebar.button("Numerical Feature Distribution", key="num_dist"): self.handle_user_input(st.session_state["analysis_file_path"], "Show the distribution of numerical features.") # if st.sidebar.button("Summary Statistics", key="summary_stats"):