Skip to content

Commit

Permalink
fix: added runnables to create usable models from served endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
broomva committed May 12, 2024
1 parent 0197998 commit 08cb77c
Show file tree
Hide file tree
Showing 8 changed files with 351 additions and 22 deletions.
8 changes: 8 additions & 0 deletions arcan/ai/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from typing import Any, Callable, Dict, List, Optional, Union

from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI, OpenAI
from pydantic import BaseModel

Expand Down Expand Up @@ -84,6 +85,13 @@ class LLMFactory:
os.getenv("OPENAI_API_BASE_URL", "https://api.together.xyz/v1"),
),
),
"ChatGroq": lambda **kwargs: ChatGroq(
temperature=kwargs.get("temperature", 0.3),
model_name=kwargs.get(
"model",
os.getenv("TOGETHER_MODEL_NAME", "llama3-8b-8192"),
),
),
}

@staticmethod
Expand Down
61 changes: 61 additions & 0 deletions arcan/ai/runnables/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#%%
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable
from langserve import RemoteRunnable


class RunnableFactory:
def __init__(self, base_url: str = "http://localhost:8000/"):
self.base_url = base_url
self.runnable_cache = {}

def get_runnable(self, runnable_name: str, cache: bool = True) -> Runnable:
if cache and runnable_name in self.runnable_cache:
return self.runnable_cache[runnable_name]

runnable = RemoteRunnable(self.base_url + runnable_name + "/")
if cache:
self.runnable_cache[runnable_name] = runnable
return runnable

class ArcanRunnables:
def __init__(self, base_url: str = "http://localhost:8000/"):
self.factory = RunnableFactory(base_url=base_url)

def get_chat_spells_agent_runnable(self):
return self.factory.get_runnable(runnable_name="spells_agent")

def get_openai_runnable(self):
return self.factory.get_runnable(runnable_name="openai")

def get_groq_runnable(self):
return self.factory.get_runnable(runnable_name="groq")

# %%



# from langchain.schema import HumanMessage, SystemMessage
# from langchain.schema.runnable import RunnableMap

# arcan_runnables = ArcanRunnables(base_url="http://localhost:8000/")
# chat_spells_agent = arcan_runnables.get_chat_spells_agent_runnable()
# openai_runnable = arcan_runnables.get_openai_runnable()
# groq_runnable = arcan_runnables.get_groq_runnable()


# prompt = ChatPromptTemplate.from_messages(
# [("system", "Tell me a long story about {topic}")]
# )

# # Can define custom chains
# chain = prompt | RunnableMap({
# "openai": openai_runnable,
# "groq": groq_runnable,
# })
# # %%

# chain.batch([{"topic": "parrots"}, {"topic": "cats"}])


# %%
9 changes: 8 additions & 1 deletion arcan/ai/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_experimental.utilities import PythonREPL

from arcan.spells.scrapping import scrape_website, scrape_website_selenium
from arcan.spells.scrapping import (firecrawl_scrape, scrape_website,
scrape_website_selenium)
from arcan.spells.search import serper_api_search

load_dotenv()
Expand Down Expand Up @@ -51,6 +52,12 @@ def get_word_length(word: str) -> int:
description="Useful when you need to get data from a website url and the regular Scrape Website method is not working correctly; DO NOT make up any url, the url should only be from the search results. Prefer Tavily seach tool over this one unless explicitly asked to perform a scrapping task",
)

firecrawl_tool = Tool(
name="firecrawl",
func=firecrawl_scrape,
description="Useful when you need to get data from a website url; DO NOT make up any url, use the one provided by the user.",
)

python_repl = PythonREPL()

repl_tool = Tool(
Expand Down
51 changes: 32 additions & 19 deletions arcan/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

from dotenv import load_dotenv
from fastapi import Depends, FastAPI, Form, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage
from langchain_core.runnables import Runnable
from langserve import add_routes
from langserve.pydantic_v1 import BaseModel, Field
from sqlalchemy.orm import Session

from arcan.ai.agents import ArcanSpellsAgent
from arcan.ai.llm import LLM
from arcan.api.datamodels import get_db, get_db_context
from arcan.api.session import ArcanSession, run_agent

Expand All @@ -26,11 +27,17 @@
app = FastAPI()


# @app.get("/")
# def default():
# return {
# "message": "Check out the API documentation at http://arcanai.tech/api/docs"
# }

# Set all CORS enabled origins
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["*"],
)


@app.get("/")
async def redirect_root_to_docs():
Expand All @@ -42,13 +49,6 @@ async def index():
return {"message": "Arcan is Running!"}


# @app.get("/api/chat/{user_id}")
# async def api_user_chat(user_id: str, query: str, db: Session = Depends(get_db)):
# arcan_session = ArcanSession(db)
# response = run_agent(session=arcan_session, user_id=user_id, query=query)
# return {"response": response}


# @requires_auth
@app.get("/api/chat")
async def chat(user_id: str, query: str, db: Session = Depends(get_db)):
Expand All @@ -58,7 +58,6 @@ async def chat(user_id: str, query: str, db: Session = Depends(get_db)):

#%%


class Input(BaseModel):
input: str
chat_history: List[Union[HumanMessage, AIMessage, FunctionMessage]] = Field(
Expand All @@ -70,13 +69,27 @@ class Output(BaseModel):
output: Any


def get_runnable() -> Runnable:
return ArcanSpellsAgent().agent_executor


add_routes(
app=app,
runnable=get_runnable().with_types(input_type=Input, output_type=Output).with_config({"run_name": "agent"}),
runnable=ArcanSpellsAgent().agent_executor.with_types(input_type=Input, output_type=Output).with_config({"run_name": "agent"}),
path="/spells_agent",
enable_feedback_endpoint=True,
)

add_routes(
app,
LLM(provider='ChatOpenAI').llm,
path="/openai",
)

add_routes(
app,
LLM(provider='ChatGroq').llm,
path="/groq",
)

add_routes(
app,
LLM(provider='ChatTogetherAI').llm,
path="/together",
)
95 changes: 95 additions & 0 deletions arcan/spells/scrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import html2text
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from firecrawl import FirecrawlApp
from langchain.agents import Tool
from langchain_community.tools import WikipediaQueryRun
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.utilities import WikipediaAPIWrapper
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

Expand Down Expand Up @@ -128,3 +134,92 @@ def url_text_scrapper(url: str):
file_path.write_text(scrapped_text)

return scrapped_text, clean_domain


def firecrawl_loader(url: str, mode: str = "scrape"):
from langchain_community.document_loaders import FireCrawlLoader
loader = FireCrawlLoader(
api_key=os.environ.get("FIRECRAWL_API_KEY"),
url=url,
mode=mode # scrape: Scrape single url and return the markdown.
# crawl: Crawl the url and all accessible sub pages and return the markdown for each one.
)
return loader




def firecrawl_scrape(url):
"""
The function `firecrawl_scrape` takes a URL as input and uses the FirecrawlApp class to scrape the
content of the webpage at that URL.
:param url: The `url` parameter in the `firecrawl_scrape` function is a string that represents the
URL of the webpage that you want to scrape using the FirecrawlApp
:return: The `firecrawl_scrape` function is returning the result of calling the `scrape_url` method
of a `FirecrawlApp` instance with the provided `url` as an argument. It is a markdown string of the
scraped content of the webpage at the provided URL.
"""
return FirecrawlApp().scrape_url(url, {
'extractorOptions': {
'mode': 'llm-extraction',
'extractionPrompt': 'Extract the key elements, segment by NER, and summarize the content. Make sure the returned content is at most 16385 tokens'
},
'pageOptions':{
'onlyMainContent': True
}
})



from pydantic import AnyHttpUrl


def scrapegraph_scrape(url: AnyHttpUrl, prompt: str):
from scrapegraphai.graphs import SmartScraperGraph
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
"base_url": "http://localhost:11434", # set Ollama URL
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"base_url": "http://localhost:11434", # set Ollama URL
},
"verbose": True,
}

smart_scraper_graph = SmartScraperGraph(
prompt=prompt,
# also accepts a string with the already downloaded HTML code
source=url.__str__(),
config=graph_config
)

result = smart_scraper_graph.run()
print(result)


from pydantic import FilePath


async def llama_parse_scrape(pdf_path: FilePath):
import nest_asyncio

nest_asyncio.apply()

from llama_parse import LlamaParse

parser = LlamaParse(
api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
result_type="markdown", # "markdown" and "text" are available
num_workers=4, # if multiple files passed, split in `num_workers` API calls
verbose=True,
language="en", # Optionally you can define a language, default=en
)

# async
documents = await parser.aload_data(pdf_path)
return documents
Loading

0 comments on commit 08cb77c

Please sign in to comment.