Skip to content

Commit 4ffd8b4

Browse files
authored
autopep8 action fixes
1 parent ec144cc commit 4ffd8b4

File tree

3 files changed

+15
-15
lines changed

3 files changed

+15
-15
lines changed

core/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ class URL(BaseModel):
99
class ScrapedURLs(BaseModel):
1010
urls: List[str]
1111
source_url: str
12-
13-
12+
13+
1414
class SourceDocument(BaseModel):
1515
service: str
1616
url: str

core/prompts.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<|system|>
66
You are an expert lawyer analyzing terms of service agreements for a website (called "service") Given a query statement and 4 pieces of text extracted from the service's documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false. DO NOT IMPLY ANYTHING NOT GIVEN IN THE TEXT.
77
8-
Here are some examples:
8+
Here are some examples:
99
1010
Given the statement "You sign away all moral rights", which of the following texts, if any, answer it fully?
1111
@@ -22,7 +22,7 @@
2222
"You will not license, sell, or transfer your Account without our prior written approval."
2323
```
2424
4)
25-
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
25+
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
2626
Content, you may expose yourself to liability if you post or share Content without all necessary rights."
2727
```
2828
{{
@@ -31,7 +31,7 @@
3131
"answer": true
3232
}}
3333
34-
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
34+
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
3535
1)
3636
```
3737
personalized, unique and relevant offering, as this is why users come to the
@@ -140,6 +140,7 @@ class DocClassifierPromptTemplate(StringPromptTemplate, BaseModel):
140140
Determine from the title and source domain of a document discovered by the linkFinder content script
141141
whether is is likely to be a terms and conditions document or not
142142
"""
143+
143144
def format(self, **kwargs) -> str:
144145
prompt = DOC_PROMPT.format(
145146
urls=kwargs["urls"],
@@ -153,7 +154,7 @@ class RAGQueryPromptTemplate(StringPromptTemplate, BaseModel):
153154
Custom prompt template that takes in the query (a TOSDR case like "This service can read your messages")
154155
and formats the prompt template to provide the query and the 4 texts returned from the vector store
155156
"""
156-
157+
157158
def format(self, **kwargs) -> str:
158159
prompt = RAG_PROMPT.format(
159160
query=kwargs["query"],
@@ -163,4 +164,3 @@ def format(self, **kwargs) -> str:
163164
result4=kwargs["results"][3],
164165
)
165166
return prompt
166-

core/server.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
load_dotenv()
4949
print("Setting up vector store...")
5050
# Handling vector store
51-
# Initialize persistent client and collection
51+
# Initialize persistent client and collection
5252
embedding_function = SentenceTransformerEmbeddings(
5353
model_name="all-MiniLM-L6-v2"
5454
)
@@ -195,15 +195,15 @@ async def add_src_doc(src_doc: SourceDocument):
195195
{src_doc.service} already exists in the database"
196196
}
197197
)
198-
198+
199199
# Create Langchain Document object from our request
200200
original_doc = Document(
201201
page_content=src_doc.text,
202202
metadata={
203203
"service": src_doc.service,
204204
"url": src_doc.url,
205205
"name": src_doc.name
206-
}
206+
}
207207
)
208208
# Turn HTML of page into markdown
209209
html2text = Html2TextTransformer()
@@ -219,7 +219,7 @@ async def add_src_doc(src_doc: SourceDocument):
219219
headers_to_split_on=headers_to_split_on
220220
)
221221
split_by_headers = md_header_splitter.split_text(md_doc.page_content)
222-
222+
223223
# Go through each markdown chunk and recursively split
224224
recursive_char_splitter = RecursiveCharacterTextSplitter(
225225
chunk_size=500,
@@ -250,7 +250,7 @@ async def scrape_src_doc(browser, url, service):
250250
html = await page.content()
251251
# Only get the domain without subdomain to avoid cases
252252
# where the service would be "github.com" but source doc links
253-
# are in "docs.github.com"
253+
# are in "docs.github.com"
254254
name = await page.title()
255255
try:
256256
src_doc = SourceDocument(
@@ -337,7 +337,7 @@ async def make_query(query: LLMQuery):
337337
# print(query_response)
338338
if len(query_response) < 4:
339339
result["error"] = 0
340-
extension_response["results"].append(result)
340+
extension_response["results"].append(result)
341341
continue
342342
# For each returned text from the vector store, insert into prompt,
343343
# send to model and parse response
@@ -405,9 +405,9 @@ async def make_query(query: LLMQuery):
405405
response = json.loads(llm_response)
406406
check = response["statement"]
407407
if check:
408-
# Only append it to results if the statement actually appleis
408+
# Only append it to results if the statement actually
409+
# appleis
409410
extension_response["results"].append(result)
410411
except json.JSONDecodeError:
411412
print("Error")
412413
return extension_response
413-

0 commit comments

Comments
 (0)