backend.py

from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime
import hashlib
import json
import os
import base64
from io import BytesIO

import chromadb
from chromadb.utils import embedding_functions
from deep_translator import GoogleTranslator
import requests
from PIL import Image
from langchain_core.tools import tool

import streamlit as st
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_ollama import ChatOllama
from langchain.globals import set_debug
import wolframalpha

@dataclass
class Config:
    """Configuration settings"""
    SIMILARITY_THRESHOLD: float = 0.90
    CHROMA_PATH: str = "./chromadb"
    STORAGE_DIR: str = "storage/qa_images"
    MAX_RETRIES: int = 3
    SOURCE_LANG: str = "uk"  # Ukrainian
    TARGET_LANG: str = "en"  # English

    def __post_init__(self):
        Path(self.STORAGE_DIR).mkdir(parents=True, exist_ok=True)
        Path(self.CHROMA_PATH).mkdir(parents=True, exist_ok=True)

class Translator:
    def __init__(self, source_lang: str, target_lang: str):
        self.source_lang = source_lang
        self.target_lang = target_lang
        self.translator_to_en = GoogleTranslator(source=source_lang, target=target_lang)
        self.translator_from_en = GoogleTranslator(source=target_lang, target=source_lang)

    def to_english(self, text: str) -> str:
        """Translate text to English"""
        try:
            return self.translator_to_en.translate(text)
        except Exception as e:
            return f"Translation error: {str(e)}"

    def to_ukrainian(self, text: str) -> str:
        """Translate text to Ukrainian"""
        try:
            return self.translator_from_en.translate(text)
        except Exception as e:
            return f"Translation error: {str(e)}"

def setup_chromadb():
    """Initialize ChromaDB collection with proper error handling"""
    config = Config()
    
    try:
        # Ensure directory exists
        os.makedirs(config.CHROMA_PATH, exist_ok=True)
        
        # Initialize client
        chroma_client = chromadb.PersistentClient(path=config.CHROMA_PATH)
        embedding_function = embedding_functions.DefaultEmbeddingFunction()
        
        # Get or create collection
        collection = chroma_client.get_or_create_collection(
            name="math_qa_ukr",
            embedding_function=embedding_function,
            metadata={"description": "Math QA database with Ukrainian results"}
        )
        
        # Validate collection
        if not collection:
            raise Exception("Failed to create or get collection")
            
        return collection, embedding_function
        
    except Exception as e:
        print(f"ChromaDB initialization failed: {str(e)}")
        raise Exception(f"ChromaDB initialization failed: {str(e)}")

def generate_document_id(question: str) -> str:
    """Generate unique document ID"""
    return hashlib.md5(question.encode()).hexdigest()

def save_images_locally(images: List[tuple], doc_id: str, config: Config) -> List[Dict[str, str]]:
    """Save images and return their metadata"""
    image_data = []
    
    for idx, (img, title) in enumerate(images):
        image_path = f"{config.STORAGE_DIR}/{doc_id}_plot{idx}.png"
        img.save(image_path)
        image_data.append({"path": image_path, "description": title})
    
    return image_data

def build_response(images: List[tuple], result: str, reasoning: str) -> str:
    """Build response with results and images"""
    response = f"**Результат:** {result}\n\n**Пояснення:**\n{reasoning}\n\n"
    
    if images:
        response += "\n**Візуалізації:**\n"
        for img, title in images:
            # Get original dimensions
            width, height = img.size
            
            # Calculate aspect ratio and new dimensions
            max_width = 800
            aspect_ratio = width / height
            new_height = int(max_width / aspect_ratio)
            
            # Resize image maintaining aspect ratio
            resized_img = img.resize((max_width, new_height), Image.Resampling.LANCZOS)
            
            # Convert to base64
            buffered = BytesIO()
            resized_img.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()
            
            # Add to response with explicit dimensions
            response += f"\n**{title}**\n"
            response += f'<img src="data:image/png;base64,{img_str}" width="{max_width}" height="{new_height}" style="object-fit: contain">'
    
    return response


def test_wolfram_api_key(api_key: str) -> bool:
    """Validate Wolfram Alpha API key"""
    try:
        client = wolframalpha.Client(api_key)
        res = client.query("2+2")
        next(res.results)
        return True
    except Exception:
        return False

def query_wolfram_alpha(question: str, api_key: str, translator) -> Tuple[List[tuple], str]:
    """Query Wolfram Alpha API with English translation and translate descriptions back to Ukrainian"""
    try:
        # Translate question to English for Wolfram Alpha
        question_en = translator.to_english(question)
        
        client = wolframalpha.Client(api_key)
        res = client.query(question_en.strip(), params=(("format", "image,plaintext"),))
        
        images = []
        for pod in res.pods:
            # Translate pod title to Ukrainian
            pod_title_ukr = translator.to_ukrainian(pod.title)
            for subpod in pod.subpods:
                if hasattr(subpod, "img"):
                    response = requests.get(subpod.img.src)
                    img = Image.open(BytesIO(response.content))
                    images.append((img, pod_title_ukr))  # Use translated title
        
        try:
            result = next(res.results).text
            # Translate result back to Ukrainian
            result_ukr = translator.to_ukrainian(result)
            return images, result_ukr
        except (StopIteration, AttributeError):
            return images, "Текстовий результат недоступний"
            
    except Exception as e:
        error_msg = str(e)
        if "Invalid appid" in error_msg:
            raise ValueError("Недійсний ключ API Wolfram Alpha")
        raise Exception(f"Помилка запиту Wolfram Alpha: {error_msg}")

def solve_math_with_reasoning(question: str, translator: Translator) -> Tuple[str, str]:
    """
    Solve mathematical problems that don't require Wolfram Alpha using local reasoning.
    Returns tuple of (result, reasoning).
    """
    question_en = translator.to_english(question)
    
    prompt = f"""You are a math tutor. 
    Question: {question_en}

    Please solve this problem step by step:
    1. Identify the type of problem (percentage, basic arithmetic, etc.)
    2. Extract the relevant numbers and information
    3. Choose the appropriate formula or method
    4. Perform the calculations
    5. Provide the final answer
    
    If this is not a mathematical problem or cannot be solved with basic math, return "NOT_MATH".
    Format your response as: RESULT|REASONING
    Example: "15.0|1. This is a percentage increase problem..."
    """

    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": "cow/gemma2_tools:9b",
                "prompt": prompt,
                "temperature": 0.7,
                "stream": False,
            }
        )
        
        if response.status_code == 200:
            answer = response.json()["response"]
            if answer == "NOT_MATH":
                raise ValueError("Not a mathematical problem")
                
            # Split result and reasoning
            result, reasoning = answer.split("|", 1)
            
            # Translate back to Ukrainian
            result_ukr = translator.to_ukrainian(result)
            reasoning_ukr = translator.to_ukrainian(reasoning)
            
            return result_ukr, reasoning_ukr
            
    except Exception as e:
        print(f"Error in local reasoning: {str(e)}")
        raise
    
def generate_reasoning(question: str, wolfram_result: str, translator) -> str:
    """Generate detailed step-by-step explanation in Ukrainian"""
    question_en = translator.to_english(question)
    result_en = translator.to_english(wolfram_result)
    
    prompt = f"""You are a math tutor.
    Question: {question_en}
Wolfram Alpha Result: {result_en}

Explain the solution step by step:
1. Write out the relevant formulas used
2. Show each calculation step
3. Explain why this approach works
4. If the question not about math (like distance from to) - return 123123123.

Focus on mathematical reasoning only.Be concise. """

    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": "cow/gemma2_tools:9b",
                "prompt": prompt,
                "temperature": 0.7,
                "stream": False,
            },
            #timeout=120,
        )
        
        if response.status_code == 200:
            explanation_en = response.json()["response"]
            return translator.to_ukrainian(explanation_en)
        else:
            return "Помилка генерації пояснення"
            
    except requests.Timeout:
        return "Час очікування генерації пояснення вийшов"
    except Exception as e:
        return f"Помилка генерації пояснення: {str(e)}"

def search_existing_answer(
    question: str,
    collection: chromadb.Collection,
    embedding_function,
    translator,
    config
) -> Optional[Dict[str, Any]]:
    """Search for similar questions and return cached answer if similarity is high enough"""
    try:
        question_en = translator.to_english(question)
        
        # Query the collection
        results = collection.query(
            query_embeddings=embedding_function([question_en]),
            n_results=1,
            include=["documents", "metadatas", "distances"]
        )
        
        # Check if we have any results and if the results contain valid data
        if (not results["ids"] or 
            not results["ids"][0] or  # Check if first result set exists
            not results["distances"] or 
            not results["distances"][0] or  # Check if distances exist
            not results["documents"] or 
            not results["documents"][0]):  # Check if documents exist
            return None
            
        # Get the first result's distance
        distance = float(results["distances"][0][0])  # Ensure distance is float
        
        # Convert distance to similarity score (assuming cosine distance)
        similarity = 1 - (distance / 2)
        
        if similarity >= config.SIMILARITY_THRESHOLD:
            try:
                doc = json.loads(results["documents"][0][0])
                doc["similarity"] = similarity
                return doc
            except (json.JSONDecodeError, IndexError) as e:
                print(f"Error parsing document JSON: {str(e)}")
                return None
            
        return None
        
    except Exception as e:
        print(f"Detailed error in similarity search: {str(e)}")
        return None  # Return None instead of raising exception for graceful degradation
    
def format_qa_document(
    question: str,
    answer: str,
    reasoning: str,
    images: List[tuple],
    doc_id: str,
    translator,
    config
) -> Dict[str, Any]:
    """Format QA pair for storage"""
    image_data = save_images_locally(images, doc_id, config)
    
    return {
        "answer": {"text": answer},
        "reasoning": {"text": reasoning},
        "images": {
            "paths": [img["path"] for img in image_data],
            "descriptions": [img["description"] for img in image_data],
        },
        "metadata": {
            "category": "math",
            "timestamp": datetime.now().isoformat(),
            "question": question,
            "question_en": translator.to_english(question),
        },
    }

@tool
def process_math_query(question: str, wolfram_api_key: str) -> Dict[str, Any]:
    """Process mathematical questions using Wolfram Alpha or local reasoning.
    
    Args:
        question: Mathematical question in Ukrainian
        wolfram_api_key: Valid Wolfram Alpha API key
        
    Returns:
        Dict containing result, reasoning, and images
    """
    
    config = Config()
    translator = Translator(config.SOURCE_LANG, config.TARGET_LANG)
    collection, embedding_function = setup_chromadb()
    
    # Check cache first
    cached_result = search_existing_answer(
        question, collection, embedding_function, translator, config
    )
    
    if cached_result:
        return {
            "result": cached_result["answer"]["text"],
            "reasoning": cached_result["reasoning"]["text"],
            "images": cached_result["images"],
            "cached": True,
            "similarity": cached_result.get("similarity", 0)
        }
    
    try:
        # First try Wolfram Alpha
        images, result = query_wolfram_alpha(question, wolfram_api_key, translator)
        reasoning = generate_reasoning(question, result, translator)
    except Exception as wolfram_error:
        try:
            # If Wolfram fails, try local reasoning
            result, reasoning = solve_math_with_reasoning(question, translator)
            images = []  # No images for locally solved problems
        except Exception as reasoning_error:
            # If both methods fail, raise the original Wolfram error
            raise wolfram_error
    
    # Format and store new result
    doc_id = generate_document_id(question)
    qa_document = format_qa_document(
        question=question,
        answer=result,
        reasoning=reasoning,
        images=images,
        doc_id=doc_id,
        translator=translator,
        config=config
    )
    
    # Save to ChromaDB
    collection.add(
        ids=[doc_id],
        documents=[json.dumps(qa_document)],
        metadatas=[qa_document["metadata"]],
        embeddings=embedding_function([qa_document["metadata"]["question_en"]])
    )
    
    return {
        "result": result,
        "reasoning": reasoning,
        "images": qa_document["images"],
        "cached": False
    }

@tool
def request_clarification(query_type: str) -> str:
    """Request clarification for ambiguous queries.
    
    Args:
        query_type: Type of query needing clarification ('math')
        
    Returns:
        str: Clarification request message in Ukrainian
    """
    clarification_messages = {
        "math": "Будь ласка, уточніть ваше математичне питання. Наприклад: розв'яжи рівняння, обчисли вираз, тощо."
    }
    return clarification_messages.get(
        query_type,
        "Будь ласка, надайте більше деталей щодо вашого питання."
    )

set_debug(True)

# Initialize session state
if "chat_history" not in st.session_state:
    st.session_state.chat_history = [
        {
            "role": "assistant",
            "content": "Запитайте мене про математику чи науку!",
            "images": None
        }
    ]

if "messages" not in st.session_state:
    st.session_state.messages = [
        SystemMessage(content="""
        You are a mathematical and scientific assistant that can process questions in Ukrainian.
        You have access to these tools:
        1. process_math_query: Use for ANY mathematical or scientific questions
        2. request_clarification: Use when the question is ambiguous
        
        Always maintain a friendly and helpful tone. 
        """)
    ]

# Tool definitions
tools_list = {
    "process_math_query": process_math_query,
    "request_clarification": request_clarification,
}

def validate_input(question: str) -> tuple[bool, str]:
    """Validate user input"""
    if not question.strip():
        return False, "Питання не може бути порожнім"
    if len(question) > 1000:
        return False, "Питання занадто довге (максимум 1000 символів)"
    return True, ""

def process_query(user_input: str, wolfram_api_key: str):
    """Process user query and generate response with optional images"""
    llm = ChatOllama(model="cow/gemma2_tools:9b")
    llm_with_tools = llm.bind_tools(list(tools_list.values()))
    
    st.session_state.messages.append(HumanMessage(content=user_input))
    ai_response = llm_with_tools.invoke(st.session_state.messages)
    st.session_state.messages.append(ai_response)
    
    # Process tool calls
    images = None
    result = None
    reasoning = None
    
    if hasattr(ai_response, 'tool_calls') and ai_response.tool_calls:
        for tool_call in ai_response.tool_calls:
            tool_name = tool_call["name"].lower()
            selected_tool = tools_list.get(tool_name)
            
            if selected_tool:
                if tool_name == "process_math_query":
                    tool_call["args"]["wolfram_api_key"] = wolfram_api_key
                
                try:
                    tool_response = selected_tool.invoke(tool_call["args"])
                    
                    if tool_name == "process_math_query" and isinstance(tool_response, dict):
                        result = tool_response.get("result", "Не вдалося отримати результат")
                        reasoning = tool_response.get("reasoning", "Пояснення недоступне")
                        # Load images from storage
                        if "images" in tool_response:
                            images = []
                            for path, desc in zip(
                                tool_response["images"].get("paths", []),
                                tool_response["images"].get("descriptions", [])
                            ):
                                if os.path.exists(path):
                                    img = Image.open(path)
                                    images.append((img, desc))
                except Exception as e:
                    result = f"Помилка виконання інструменту: {str(e)}"
                    reasoning = "Виникла помилка при обробці запиту"
                    images = None
    
    return ai_response.content or "Запитайтесь щось з матиматики", images, result, reasoning
# Set up Streamlit interface
st.set_page_config(page_title="Математичний та науковий помічник", layout="wide")
st.title("Математичний та науковий помічник")

# Sidebar settings
with st.sidebar:
    st.title("Налаштування")
    wolfram_api_key = st.text_input("Ключ API Wolfram Alpha", type="password")
    
    if wolfram_api_key:
        if test_wolfram_api_key(wolfram_api_key):
            st.success("✅ Дійсний ключ API")
        else:
            st.error("❌ Недійсний ключ API")
    
    try:
        ollama_check = requests.get("http://localhost:11434/api/tags")
        if ollama_check.status_code == 200:
            st.success("✅ Ollama підключено")
        else:
            st.error("❌ Помилка Ollama")
    except:
        st.error("❌ Ollama не знайдено")

# Display chat history
for message in st.session_state.chat_history:
    with st.chat_message(message["role"]):
        st.write(message["content"])
        if message.get("images"):
            for img, desc in message["images"]:
                st.image(img, caption=desc)

# Chat input
if user_input := st.chat_input("Введіть ваше питання..."):
    if not wolfram_api_key:
        st.error("Будь ласка, введіть ключ API Wolfram Alpha")
        st.stop()
    
    valid, error = validate_input(user_input)
    if not valid:
        st.error(error)
        st.stop()
    
    # Add user message to chat
    st.session_state.chat_history.append({
        "role": "user",
        "content": user_input,
        "images": None
    })
    
    with st.chat_message("user"):
        st.write(user_input)
    
    # Get AI response
    with st.chat_message("assistant"):
        with st.spinner("Думаю..."):
            try:
                response, images, result, reasoning = process_query(user_input, wolfram_api_key)
                if result=='str' and len(reasoning) <=100:
                    st.write(result)
                elif result==None:
                    st.write(response)
                else:
                    st.write("Пояснення",reasoning)
                if images:
                    for img, desc in images:
                        st.image(img, caption=desc)
                
                # Add assistant response to chat
                st.session_state.chat_history.append({
                    "role": "assistant",
                    "content": response,
                    "images": images if images else None
                })
                
            except Exception as e:
                st.error(f"Помилка: {str(e)}")

# Clear chat button
if st.sidebar.button("Очистити чат"):
    st.session_state.chat_history = [
        {
            "role": "assistant",
            "content": "Запитайте мене про математику чи науку!",
            "images": None
        }
    ]
    st.session_state.messages = [st.session_state.messages[0]]  # Keep only system message
    st.rerun()