From 82c7ed97e0442c62530b6f2517f775b7013218f0 Mon Sep 17 00:00:00 2001 From: mad-cat-lon Date: Sat, 20 Jul 2024 00:12:53 +0000 Subject: [PATCH] autopep8 action fixes --- core/prompts.py | 12 ++++++------ core/server.py | 19 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/core/prompts.py b/core/prompts.py index 43d5da8..4987348 100644 --- a/core/prompts.py +++ b/core/prompts.py @@ -3,8 +3,8 @@ PROMPT = """ <|system|> -You are an expert lawyer analyzing terms of service agreements. Given a statement about the service and 4 pieces of text extracted from its documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false. -Here are some examples: +You are an expert lawyer analyzing terms of service agreements. Given a statement about the service and 4 pieces of text extracted from its documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false. +Here are some examples: Given the statement "You sign away all moral rights", which of the following texts, if any, answer it fully? @@ -21,7 +21,7 @@ "You will not license, sell, or transfer your Account without our prior written approval." ``` 4) -```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your +```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your Content, you may expose yourself to liability if you post or share Content without all necessary rights." ``` {{ @@ -30,7 +30,7 @@ "answer": true }} -Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully? +Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully? 1) ``` personalized, unique and relevant offering, as this is why users come to the @@ -104,12 +104,13 @@ n_results = 4 + class RAGQueryPromptTemplate(StringPromptTemplate, BaseModel): """ Custom prompt template that takes in the query (a TOSDR case like "This service can read your messages") and formats the prompt template to provide the query and the 4 texts returned from the vector store """ - + def format(self, **kwargs) -> str: prompt = PROMPT.format( query=kwargs["query"], @@ -119,4 +120,3 @@ def format(self, **kwargs) -> str: result4=kwargs["results"][3], ) return prompt - diff --git a/core/server.py b/core/server.py index 8859b50..c8fc629 100644 --- a/core/server.py +++ b/core/server.py @@ -41,7 +41,7 @@ load_dotenv() print("Setting up vector store...") # Handling vector store -# Initialize persistent client and collection +# Initialize persistent client and collection embedding_function = SentenceTransformerEmbeddings( model_name="all-MiniLM-L6-v2" ) @@ -133,7 +133,7 @@ async def add_src_document(src_doc: SourceDocument): {src_doc.service} already exists in the database" } ) - + # Create Langchain Document object from our request original_doc = Document( page_content=src_doc.text, @@ -141,7 +141,7 @@ async def add_src_document(src_doc: SourceDocument): "service": src_doc.service, "url": src_doc.url, "name": src_doc.name - } + } ) # Turn HTML of page into markdown html2text = Html2TextTransformer() @@ -157,7 +157,7 @@ async def add_src_document(src_doc: SourceDocument): headers_to_split_on=headers_to_split_on ) split_by_headers = md_header_splitter.split_text(md_doc.page_content) - + # Go through each markdown chunk and recursively split recursive_char_splitter = RecursiveCharacterTextSplitter( chunk_size=500, @@ -188,7 +188,7 @@ async def scrape_raw_document_from_url(browser, url, service): html = await page.content() # Only get the domain without subdomain to avoid cases # where the service would be "github.com" but source doc links - # are in "docs.github.com" + # are in "docs.github.com" name = await page.title() src_doc = SourceDocument( service=service, @@ -248,7 +248,7 @@ async def make_query(query: LLMQuery): # print(query_response) if len(query_response) < 4: result["error"] = 0 - extension_response["results"].append(result) + extension_response["results"].append(result) continue # For each returned text from the vector store, insert into prompt, # send to model and parse response @@ -265,9 +265,9 @@ async def make_query(query: LLMQuery): query=q["text"], results=[doc.page_content for doc in query_response] ) - print("="*100) + print("=" * 100) print(prompt) - print("="*100) + print("=" * 100) llm_response = llm(prompt) print(llm_response) @@ -288,11 +288,10 @@ async def make_query(query: LLMQuery): if source_text: result["error"] = None else: - # Model chose 0 + # Model chose 0 result["error"] = 1 except json.JSONDecodeError: print("Error decoding response from model") result["error"] = 2 extension_response["results"].append(result) return extension_response -