Skip to content

Commit 935952e

Browse files
authored
Update extract.py
1 parent 40d4278 commit 935952e

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

extract.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,18 @@ def download_page(url, user_agent):
3030
def download_images(soup, base_url):
3131
print(Fore.BLUE + "Downloading images...")
3232
images = soup.find_all('img')
33-
downloaded_image_paths = [] # This list will store the paths of downloaded images.
33+
downloaded_image_paths = []
3434
for img in images:
3535
img_url = urljoin(base_url, img.get('src'))
3636
if not img_url:
37-
continue # Skip if img['src'] is None
37+
continue
3838
img_response = requests.get(img_url)
3939
img_name = os.path.basename(img_url)
4040
img_path = os.path.join(os.getcwd(), img_name)
4141
with open(img_path, 'wb') as f:
4242
f.write(img_response.content)
43-
downloaded_image_paths.append(img_path) # Add the path to the list
44-
return downloaded_image_paths # Return the list of downloaded image paths
43+
downloaded_image_paths.append(img_path)
44+
return downloaded_image_paths
4545

4646
def ocr_image(image_path):
4747
print(Fore.BLUE + f"Performing OCR on {image_path}...")
@@ -59,9 +59,6 @@ def parse_openai_response_to_iocs(text):
5959
"Indicator": parts[1].strip(),
6060
"Context": parts[2].strip(),
6161
})
62-
#else:
63-
# print(Fore.RED + "Response format incorrect, resubmitting request...")
64-
# return None
6562
return iocs
6663

6764
def extract_iocs_with_openai(content, context_identifier, retry_count=0, max_retries=1):

0 commit comments

Comments
 (0)