Skip to content

Commit

Permalink
Merge branch 'main' into performance-improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
mad-cat-lon authored Jul 20, 2024
2 parents ec144cc + bbef6ae commit 8e0f169
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
8 changes: 4 additions & 4 deletions core/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"You will not license, sell, or transfer your Account without our prior written approval."
```
4)
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
Content, you may expose yourself to liability if you post or share Content without all necessary rights."
```
{{
Expand All @@ -31,7 +31,7 @@
"answer": true
}}
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
1)
```
personalized, unique and relevant offering, as this is why users come to the
Expand Down Expand Up @@ -148,12 +148,13 @@ def format(self, **kwargs) -> str:
return prompt



class RAGQueryPromptTemplate(StringPromptTemplate, BaseModel):
"""
Custom prompt template that takes in the query (a TOSDR case like "This service can read your messages")
and formats the prompt template to provide the query and the 4 texts returned from the vector store
"""

def format(self, **kwargs) -> str:
prompt = RAG_PROMPT.format(
query=kwargs["query"],
Expand All @@ -163,4 +164,3 @@ def format(self, **kwargs) -> str:
result4=kwargs["results"][3],
)
return prompt

13 changes: 6 additions & 7 deletions core/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
load_dotenv()
print("Setting up vector store...")
# Handling vector store
# Initialize persistent client and collection
# Initialize persistent client and collection
embedding_function = SentenceTransformerEmbeddings(
model_name="all-MiniLM-L6-v2"
)
Expand Down Expand Up @@ -195,15 +195,15 @@ async def add_src_doc(src_doc: SourceDocument):
{src_doc.service} already exists in the database"
}
)

# Create Langchain Document object from our request
original_doc = Document(
page_content=src_doc.text,
metadata={
"service": src_doc.service,
"url": src_doc.url,
"name": src_doc.name
}
}
)
# Turn HTML of page into markdown
html2text = Html2TextTransformer()
Expand All @@ -219,7 +219,7 @@ async def add_src_doc(src_doc: SourceDocument):
headers_to_split_on=headers_to_split_on
)
split_by_headers = md_header_splitter.split_text(md_doc.page_content)

# Go through each markdown chunk and recursively split
recursive_char_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
Expand Down Expand Up @@ -250,7 +250,7 @@ async def scrape_src_doc(browser, url, service):
html = await page.content()
# Only get the domain without subdomain to avoid cases
# where the service would be "github.com" but source doc links
# are in "docs.github.com"
# are in "docs.github.com"
name = await page.title()
try:
src_doc = SourceDocument(
Expand Down Expand Up @@ -337,7 +337,7 @@ async def make_query(query: LLMQuery):
# print(query_response)
if len(query_response) < 4:
result["error"] = 0
extension_response["results"].append(result)
extension_response["results"].append(result)
continue
# For each returned text from the vector store, insert into prompt,
# send to model and parse response
Expand Down Expand Up @@ -410,4 +410,3 @@ async def make_query(query: LLMQuery):
except json.JSONDecodeError:
print("Error")
return extension_response

0 comments on commit 8e0f169

Please sign in to comment.