diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..45514ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.bin +*.onnx +*.log +__pycache__/ +/chroma_db +/vcvarsall.sh +search_context.txt diff --git a/README.md b/README.md index 5f054c7..20e575d 100644 --- a/README.md +++ b/README.md @@ -105,19 +105,43 @@ Example: I converted [this article](https://www.theatlantic.com/newsletters/arch > The script will: > - Pull the SearxNG Docker image - > - Create and activate a Python virtual environment - > - **USER ACTION NEEDED** Set your `GOOGLE_API_KEY` (edit the script to use your real key). [Obtain your API key (Currently Gemini, OpenAI and ollama is supported)](https://ai.google.dev/gemini-api/docs/api-key) from your preferred LLM provider. (Only needed when google mode is set, else set in model_config.py) - > - Start the SearxNG Docker container + > - Install [infinity](https://github.com/michaelfeil/infinity) in a dedicated Python environment `infinity_env` + > - Create a Python virtual environment for CoexistAI in `coexist_env` > - Install Python dependencies - > - Start the FastAPI server -4. **That’s it!** - The FastAPI and MCP server will start automatically and you’re ready to go. +4. **Run!** + + Call `zsh quick_start.sh` or `bash quick_start.sh` to start the FastAPI server and MCP server. **Note:** - Make sure Docker, Python 3, and pip are installed on your system. -- Edit quick_setup.sh to set your real `GOOGLE_API_KEY` before running (needed if using google models) -- Windows users can use [WSL](https://docs.microsoft.com/en-us/windows/wsl/) or Git Bash to run the script, or follow manual setup steps. +- Edit `quick_start.sh` to set your real `GOOGLE_API_KEY` before running (needed if using google models) + + +### Windows user + +On Windows: + +1. Run a Valkey container with instructions found [here](https://github.com/valkey-io/valkey-py) + +``` +docker run -p 6379:6379 -it valkey/valkey:latest +``` + +2. Run a SearXNG instance with the instructions [here](https://www.tanyongsheng.com/note/setting-up-searxng-on-windows-localhost-your-private-customizable-search-engine/). +Make sure to enable JSON output in SearXNG (otherwise you will get a 403 Forbidden error each time). +Make sure the instance work by visiting [this](http://localhost:8080/search?q=When%20was%20Napoleon%20born?&engines=google,brave&format=json) (you may need to change the port). + +3. In `model_config.py`, set `START_SEARXNG = 0` (since we are running our own instance) and the correct port in `PORT_NUM_SEARXNG` + +4. To install, run the following in Git Bash: +``` +wget -q https://raw.githubusercontent.com/nathan818fr/vcvars-bash/refs/heads/main/vcvarsall.sh -O vcvarsall.sh +eval "$(./vcvarsall.sh x64)" +bash quick_setup.sh +``` + +5. To run, run `bash quick_start.sh` in Git Bash --- diff --git a/model_config.py b/model_config.py index ceb5be1..bd52797 100644 --- a/model_config.py +++ b/model_config.py @@ -1,4 +1,5 @@ import os + """ This module defines the configuration for language model (LLM) and embedding models. Attributes: @@ -10,7 +11,6 @@ for ollama and other local models use "others" with base_url updated in openai_compatible. - If you using others llm type, then check the openai_compatible url dict for others key, you can generally find it by "googling YOUR provider name openai api base compatilble url" - - "llm_base_url" (str): Base URL for the LLM API endpoint. - "llm_tools" (list): List of tools or plugins to use with the LLM. - "llm_kwargs" (dict): Additional keyword arguments for LLM initialization. - "temperature" (float): Sampling temperature for generation. @@ -23,73 +23,72 @@ - "cross_encoder_name" (str): Name of the cross-encoder model for reranking. """ ############## PORT and HOST SETTINGS -PORT_NUM_SEARXNG = 8085 +PORT_NUM_SEARXNG = 8080 PORT_NUM_APP = 8000 HOST_APP = "localhost" HOST_SEARXNG = "localhost" +START_SEARXNG = 0 ############### ## USER INPUTS NEEDED -#for open source model you can replace it by 'DUMMY' (for both llm and embed), else respective providers -llm_api_key = os.environ.get('GOOGLE_API_KEY', 'DUMMY') # either paste llm key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local -embed_api_key = os.environ.get('GOOGLE_API_KEY', 'DUMMY') # either paste embeder key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local +# for open source model you can replace it by 'DUMMY' (for both llm and embed), else respective providers +llm_api_key = os.environ.get( + "GOOGLE_API_KEY", "DUMMY" +) # either paste llm key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local +embed_api_key = os.environ.get( + "GOOGLE_API_KEY", "DUMMY" +) # either paste embeder key, based on provider (for an instance, Google) here directly or export it in the env, else dummy for local model_config = { # Name of the LLM model to use. For local models, use the model name served by your local server. - "llm_model_name": "gemini-2.0-flash", - - # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' - # in case of 'others' (base url needs to be updated in openai_compatible given below accordingly). + "llm_model_name": "yasserrmd/jan-nano-4b", + # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' + # in case of 'others' (base url needs to be updated in the `openai_compatible` dictionary below). # Make sure to update the api_key variable above to match the provider. # "local" is for lmstudio, for ollama and other local models use "others" with base_url updated in openai_compatible. # You can generally find it by "googling YOUR PROVIDER (example ollama) name openai api base compatible url" - "llm_type": "google", - + "llm_type": "others", # List of tools or plugins to use with the LLM, if any. Set to None if not used. "llm_tools": None, - # Additional keyword arguments for LLM initialization. "llm_kwargs": { "temperature": 0.1, # Sampling temperature for generation. "max_tokens": None, # Maximum number of tokens to generate (None for default). - "timeout": None, # Timeout for API requests (None for default). - "max_retries": 2, # Maximum number of retries for failed requests. + "timeout": None, # Timeout for API requests (None for default). + "max_retries": 2, # Maximum number of retries for failed requests. "api_key": llm_api_key, # API key for authentication. }, - # Name of the embedding model to use. # For Google, use their embedding model names. For local/HuggingFace, use the model path or name. - "embedding_model_name": "models/embedding-001", - - "embed_kwargs":{"google_api_key": embed_api_key}, #optional additional kwargs for embedding model initialization - + # Tested models can be found at https://github.com/michaelfeil/infinity?tab=readme-ov-file#supported-tasks-and-models-by-infinity + "embedding_model_name": "mixedbread-ai/mxbai-embed-large-v1", + "embed_kwargs": {}, # optional additional kwargs for embedding model initialization # Embedding backend: 'google' for Google, 'infinity_emb' for local/HuggingFace models. - "embed_mode": "google", - + "embed_mode": "infinity_emb", # Name of the cross-encoder model for reranking, typically a HuggingFace model. - "cross_encoder_name": "BAAI/bge-reranker-base" + "cross_encoder_name": "BAAI/bge-reranker-base", } # NO CHANGE NEEDED UNLESS PROVIDER CHANGES THE BASE URLS, OR YOU WANT TO USE DIFFERENT PROVIDER UNDER "others" openai_compatible = { - 'google': "https://generativelanguage.googleapis.com/v1beta/openai/", - 'local': "http://127.0.0.1:1234/v1", - 'groq': 'https://api.groq.com/openai/v1', - 'openai':'https://api.openai.com/v1', - 'others': 'https://openrouter.ai/api/v1' # for an example I have added here the openrouter api, since its openai compatible + "google": "https://generativelanguage.googleapis.com/v1beta/openai/", + "local": "http://127.0.0.1:1234/v1", + "groq": "https://api.groq.com/openai/v1", + "openai": "https://api.openai.com/v1", + "others": "http://localhost:11434/v1", # Ollama default port } -#####IF YOU WANT TO GO ALL LOCAL +#####IF YOU WANT TO GO ALL LOCAL # model_config = { # # Name of the LLM model to use. For local models, use the model name served by your local server. # "llm_model_name": "google/gemma-3-12b", -# # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' +# # LLM provider type: choose from 'google', 'local', 'groq', or 'openai' or 'others' # # in case of 'others' (base url needs to be updated in openai_compatible given below accordingly). # # Make sure to update the api_key variable above to match the provider. -# "llm_type": "local", +# "llm_type": "local", # # List of tools or plugins to use with the LLM, if any. Set to None if not used. # "llm_tools": None, @@ -114,4 +113,4 @@ # # Name of the cross-encoder model for reranking, typically a HuggingFace model. # "cross_encoder_name": "BAAI/bge-reranker-base" -# } \ No newline at end of file +# } diff --git a/quick_setup.sh b/quick_setup.sh index e1dfbc6..d30852a 100644 --- a/quick_setup.sh +++ b/quick_setup.sh @@ -1,62 +1,80 @@ -#!/bin/zsh -# Quick Shell Setup for CoexistAI (macOS/zsh) - -echo "Pulling SearxNG Docker image..." -docker pull searxng/searxng +#!/bin/sh +# Quick Shell Setup for CoexistAI (linux/sh) +# Install infinity in its own virtual environment echo "📚 Installing infinity_emb in separate environment..." -echo "Creating and activating Python virtual environment..." -python3.13 -m venv infinity_env -source infinity_env/bin/activate -pip install 'infinity_emb[all]' -pip install --upgrade "transformers<4.49" -pip install --upgrade "typer==0.19.1" "click>=8.1.3" -deactivate -echo "✅ Infinity environment setup complete" -# (Optional) Create and activate a Python virtual environment -echo "Creating and activating Python virtual environment..." -python3.13 -m venv coexistaienv -source coexistaienv/bin/activate +echo "Remove old infinity virtual environment" +rm -rf infinity_env -pip install 'markitdown[all]' +echo "Creating infinity virtual environment" +python3 -m venv infinity_env -# You can neglect this if you dont want to use google models (either llm or embedding) -echo "Setting GOOGLE_API_KEY, add any other keys which you want to store in environment (edit this script to use your real key)" -export GOOGLE_API_KEY=REPLACE_YOUR_API_KEY_HERE_WITHOUT_QUOTES_AND_SPACES +echo "Activating infinity virtual environment" +if [ -d infinity_env/Scripts ]; then + ENV_DIR=infinity_env/Scripts +else + ENV_DIR=infinity_env/bin +fi +source $ENV_DIR/activate +if [ $? -ne 0 ]; then + echo "Error activating infinity environment" + return 1 +fi +echo "Installing infinity" +python3 -m pip install 'infinity_emb[all]' +if [ $? -ne 0 ]; then + echo "Error installing infinity in the environment" + return 1 +fi +python -m pip install --upgrade "transformers<4.49" +python -m pip install --upgrade "typer==0.19.1" "click>=8.1.3" -# Spin up the SearxNG Docker container -echo "Starting SearxNG Docker container..." -PORT_NUM_SEARXNG=$(python3.13 -c "from model_config import PORT_NUM_SEARXNG; print(PORT_NUM_SEARXNG)") -HOST_SEARXNG=$(python3.13 -c "from model_config import HOST_SEARXNG; print(HOST_SEARXNG)") +echo "Deactivating infinity virtual environment" +deactivate +echo "✅ Infinity environment setup complete" -# Stop and remove existing searxng container if it exists -if [ "$(docker ps -aq -f name=searxng)" ]; then - echo "Stopping and removing existing SearxNG container..." - docker stop searxng 2>/dev/null || true - docker rm searxng 2>/dev/null || true -fi +# (Optional) Create and activate a Python virtual environment +echo "Remove old coexistai environment" +rm -rf coexistaienv -# Start new SearxNG container -docker run -d \ - --name searxng \ - -p ${PORT_NUM_SEARXNG}:8080 \ - -v $(pwd)/searxng:/etc/searxng:rw \ - -e SEARXNG_BASE_URL=http://${HOST_SEARXNG}:${PORT_NUM_SEARXNG}/ \ - -e SEARXNG_PORT=${PORT_NUM_SEARXNG} \ - -e SEARXNG_BIND_ADDRESS=${HOST_SEARXNG} \ - --restart unless-stopped \ - searxng/searxng:latest +echo "Creating coexistai virtual environment..." +python -m venv coexistaienv -echo "SearxNG container started successfully!" +echo "Activating coexistai virtual environment" +if [ -d coexistaienv/Scripts ]; then + ENV_DIR=coexistaienv/Scripts +else + ENV_DIR=coexistaienv/bin +fi +source $ENV_DIR/activate +if [ $? -ne 0 ]; then + echo "Error activating coexistai virtual environment" + return 1 +fi # Install Python dependencies -echo "Installing Python dependencies..." -pip install -r ./requirements.txt +echo "Installing Python dependencies in coexistai virtual environment" +python -m pip install -r ./requirements.txt + +# Installing SearXNG +START_SEARXNG=$(python -c "from model_config import START_SEARXNG; print(START_SEARXNG)") +if [ $START_SEARXNG == 0 ]; then + echo "Skipping SearxNG startup as per configuration" +elif [ $START_SEARXNG == 1 ]; then + echo "Pulling SearxNG Docker image..." + docker pull searxng/searxng +else + echo "Invalid value for START_SEARXNG in model_config.py. Use 0 or 1." + exit 1 +fi + +# Deactivate coexistai virtual environment +echo "Deactivating coexistai virtual environment" +deactivate # Adding tts files -# Check if wget is installed # Check if wget or curl is installed if command -v wget &> /dev/null; then DOWNLOADER_CMD="wget" @@ -82,12 +100,3 @@ if [ ! -f voices-v1.0.bin ]; then else echo "voices-v1.0.bin already exists, skipping download." fi - - -# 8. Start the FastAPI app -echo "Starting FastAPI app..." -cd . || exit 1 -# Get port and host values from model_config -PORT_NUM_APP=$(python3.13 -c "from model_config import PORT_NUM_APP; print(PORT_NUM_APP)") -HOST_APP=$(python3.13 -c "from model_config import HOST_APP; print(HOST_APP)") -uvicorn app:app --host ${HOST_APP} --port ${PORT_NUM_APP} --reload diff --git a/quick_start.sh b/quick_start.sh new file mode 100644 index 0000000..5be8a82 --- /dev/null +++ b/quick_start.sh @@ -0,0 +1,62 @@ +#!/bin/sh +# Quick Shell Startup for CoexistAI (linux/sh) + +# For Git bash run +#export PATH=$(pwd)/infinity_env/Scripts:$PATH + +# (Optional) Activate a Python virtual environment +echo "Activating Python virtual environment..." +if [ -d coexistaienv/Scripts ]; then + ENV_DIR=coexistaienv/Scripts +else + ENV_DIR=coexistaienv/bin +fi +source $ENV_DIR/activate +if [ $? -ne 0 ]; then + echo "Error activating coexistai environment" + return 1 +fi + +# You can neglect this if you dont want to use google models (either llm or embedding) +echo "Setting GOOGLE_API_KEY, add any other keys which you want to store in environment (edit this script to use your real key)" +export GOOGLE_API_KEY=REPLACE_YOUR_API_KEY_HERE_WITHOUT_QUOTES_AND_SPACES + +# Spin up the SearxNG Docker container +START_SEARXNG=$(python -c "from model_config import START_SEARXNG; print(START_SEARXNG)") +if [ $START_SEARXNG == 0 ]; then + echo "Skipping SearxNG startup as per configuration" +elif [ $START_SEARXNG == 1 ]; then + echo "Starting SearxNG Docker container..." + PORT_NUM_SEARXNG=$(python -c "from model_config import PORT_NUM_SEARXNG; print(PORT_NUM_SEARXNG)") + HOST_SEARXNG=$(python -c "from model_config import HOST_SEARXNG; print(HOST_SEARXNG)") + + # Stop and remove existing searxng container if it exists + if [ "$(docker ps -aq -f name=searxng)" ]; then + echo "Stopping and removing existing SearxNG container..." + docker stop searxng 2>/dev/null || true + docker rm searxng 2>/dev/null || true + fi + + # Start new SearxNG container + docker run -d \ + --name searxng \ + -p ${PORT_NUM_SEARXNG}:8080 \ + -v $(pwd)/searxng:/etc/searxng:rw \ + -e SEARXNG_BASE_URL=http://${HOST_SEARXNG}:${PORT_NUM_SEARXNG}/ \ + -e SEARXNG_PORT=${PORT_NUM_SEARXNG} \ + -e SEARXNG_BIND_ADDRESS=${HOST_SEARXNG} \ + --restart unless-stopped \ + searxng/searxng:latest + echo "SearxNG container started successfully!" +else + echo "Invalid value for START_SEARXNG in model_config.py. Use 0 or 1." + exit 1 +fi + +# Start the FastAPI app +echo "Starting FastAPI app..." +cd . || exit 1 +# Get port and host values from model_config +PORT_NUM_APP=$(python -c "from model_config import PORT_NUM_APP; print(PORT_NUM_APP)") +HOST_APP=$(python -c "from model_config import HOST_APP; print(HOST_APP)") +python -m uvicorn app:app --host ${HOST_APP} --port ${PORT_NUM_APP} --reload diff --git a/requirements.txt b/requirements.txt index c74cf6c..d1f148c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,4 +39,5 @@ youtube_transcript_api>=1.1.0 slowapi charset-normalizer kokoro-onnx -soundfile \ No newline at end of file +soundfile +markitdown[all] \ No newline at end of file diff --git a/utils/utils.py b/utils/utils.py index 063e1d0..c2531e2 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -125,7 +125,7 @@ def load_model(model_name, hf_embeddings = None if _embed_mode == 'infinity_emb': - infinity_api_url = "http://0.0.0.0:7997" + infinity_api_url = "http://localhost:7997" # Check if the Infinity API server is running def _start_infinity_with_poll(model_name, infinity_api_url, max_wait=120): """ @@ -140,6 +140,9 @@ def _start_infinity_with_poll(model_name, infinity_api_url, max_wait=120): # Locate binary candidate = os.path.join(os.path.dirname(__file__), "..", "infinity_env", "bin", "infinity_emb") + if not os.path.exists(candidate): + # fallback to Scripts + candidate = os.path.join(os.path.dirname(__file__), "..", "infinity_env", "Scripts", "infinity_emb.exe") if not os.path.exists(candidate): # fallback to PATH candidate = shutil.which("infinity_emb") diff --git a/utils/websearch_utils.py b/utils/websearch_utils.py index 7897093..f81ab97 100644 --- a/utils/websearch_utils.py +++ b/utils/websearch_utils.py @@ -75,6 +75,7 @@ def __init__(self, port, host="localhost"): port (int): The port number for Searx search service. host (str): The host address for Searx search service. """ + logger.info(f"SearX in {host}:{port}") self.searcher = SearxSearchWrapper(searx_host=f"http://{host}:{port}") def query_search(self, query, engines=['google'], num_results=5):