autopep8 action fixes

mad-cat-lon · web-flow · commit 4ffd8b4b79bb · 2024-07-20T18:37:51.000Z
diff --git a/core/models.py b/core/models.py
@@ -9,8 +9,8 @@ class URL(BaseModel):
 class ScrapedURLs(BaseModel):
     urls: List[str]
     source_url: str
-    
-    
+
+
 class SourceDocument(BaseModel):
     service: str
     url: str
diff --git a/core/prompts.py b/core/prompts.py
@@ -5,7 +5,7 @@
 <|system|>
 You are an expert lawyer analyzing terms of service agreements for a website (called "service") Given a query statement and 4 pieces of text extracted from the service's documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false. DO NOT IMPLY ANYTHING NOT GIVEN IN THE TEXT.
 
-Here are some examples: 
+Here are some examples:
 
 Given the statement "You sign away all moral rights", which of the following texts, if any, answer it fully?
 
@@ -22,7 +22,7 @@
 "You will not license, sell, or transfer your Account without our prior written approval."
 ```
 4)
-```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your 
+```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
 Content, you may expose yourself to liability if you post or share Content without all necessary rights."
 ```
 {{
@@ -31,7 +31,7 @@
     "answer": true
 }}
 
-Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully? 
+Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
 1)
 ```
 personalized, unique and relevant offering, as this is why users come to the
@@ -140,6 +140,7 @@ class DocClassifierPromptTemplate(StringPromptTemplate, BaseModel):
     Determine from the title and source domain of a document discovered by the linkFinder content script
     whether is is likely to be a terms and conditions document or not
     """
+
     def format(self, **kwargs) -> str:
         prompt = DOC_PROMPT.format(
             urls=kwargs["urls"],
@@ -153,7 +154,7 @@ class RAGQueryPromptTemplate(StringPromptTemplate, BaseModel):
     Custom prompt template that takes in the query (a TOSDR case like "This service can read your messages")
     and formats the prompt template to provide the query and the 4 texts returned from the vector store
     """
-    
+
     def format(self, **kwargs) -> str:
         prompt = RAG_PROMPT.format(
             query=kwargs["query"],
@@ -163,4 +164,3 @@ def format(self, **kwargs) -> str:
             result4=kwargs["results"][3],
         )
         return prompt
-    
diff --git a/core/server.py b/core/server.py
@@ -48,7 +48,7 @@
 load_dotenv()
 print("Setting up vector store...")
 # Handling vector store
-# Initialize persistent client and collection 
+# Initialize persistent client and collection
 embedding_function = SentenceTransformerEmbeddings(
     model_name="all-MiniLM-L6-v2"
 )
@@ -195,15 +195,15 @@ async def add_src_doc(src_doc: SourceDocument):
                 {src_doc.service} already exists in the database"
             }
         )
-    
+
     # Create Langchain Document object from our request
     original_doc = Document(
         page_content=src_doc.text,
         metadata={
             "service": src_doc.service,
             "url": src_doc.url,
             "name": src_doc.name
-        } 
+        }
     )
     # Turn HTML of page into markdown
     html2text = Html2TextTransformer()
@@ -219,7 +219,7 @@ async def add_src_doc(src_doc: SourceDocument):
         headers_to_split_on=headers_to_split_on
     )
     split_by_headers = md_header_splitter.split_text(md_doc.page_content)
-    
+
     # Go through each markdown chunk and recursively split
     recursive_char_splitter = RecursiveCharacterTextSplitter(
         chunk_size=500,
@@ -250,7 +250,7 @@ async def scrape_src_doc(browser, url, service):
         html = await page.content()
         # Only get the domain without subdomain to avoid cases
         # where the service would be "github.com" but source doc links
-        # are in "docs.github.com" 
+        # are in "docs.github.com"
         name = await page.title()
         try:
             src_doc = SourceDocument(
@@ -337,7 +337,7 @@ async def make_query(query: LLMQuery):
         # print(query_response)
         if len(query_response) < 4:
             result["error"] = 0
-            extension_response["results"].append(result)            
+            extension_response["results"].append(result)
             continue
         # For each returned text from the vector store, insert into prompt,
         # send to model and parse response
@@ -405,9 +405,9 @@ async def make_query(query: LLMQuery):
                 response = json.loads(llm_response)
                 check = response["statement"]
                 if check:
-                    # Only append it to results if the statement actually appleis
+                    # Only append it to results if the statement actually
+                    # appleis
                     extension_response["results"].append(result)
             except json.JSONDecodeError:
                 print("Error")
     return extension_response
-