@@ -30,18 +30,18 @@ def download_page(url, user_agent):
30
30
def download_images (soup , base_url ):
31
31
print (Fore .BLUE + "Downloading images..." )
32
32
images = soup .find_all ('img' )
33
- downloaded_image_paths = [] # This list will store the paths of downloaded images.
33
+ downloaded_image_paths = []
34
34
for img in images :
35
35
img_url = urljoin (base_url , img .get ('src' ))
36
36
if not img_url :
37
- continue # Skip if img['src'] is None
37
+ continue
38
38
img_response = requests .get (img_url )
39
39
img_name = os .path .basename (img_url )
40
40
img_path = os .path .join (os .getcwd (), img_name )
41
41
with open (img_path , 'wb' ) as f :
42
42
f .write (img_response .content )
43
- downloaded_image_paths .append (img_path ) # Add the path to the list
44
- return downloaded_image_paths # Return the list of downloaded image paths
43
+ downloaded_image_paths .append (img_path )
44
+ return downloaded_image_paths
45
45
46
46
def ocr_image (image_path ):
47
47
print (Fore .BLUE + f"Performing OCR on { image_path } ..." )
@@ -59,9 +59,6 @@ def parse_openai_response_to_iocs(text):
59
59
"Indicator" : parts [1 ].strip (),
60
60
"Context" : parts [2 ].strip (),
61
61
})
62
- #else:
63
- # print(Fore.RED + "Response format incorrect, resubmitting request...")
64
- # return None
65
62
return iocs
66
63
67
64
def extract_iocs_with_openai (content , context_identifier , retry_count = 0 , max_retries = 1 ):
0 commit comments