-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathcivitai-image.py
89 lines (70 loc) · 3.24 KB
/
civitai-image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import requests
from PIL import Image, UnidentifiedImageError
from io import BytesIO
import os
import re
from tqdm import tqdm
# Configuration
api_key = "xxx"
headers = {"Authorization": f"Bearer {api_key}"}
initial_url = "https://civitai.com/api/v1/images"
download_path = input("Enter the path where you want to save the images: ")
min_width = int(input("Enter minimum width of the image: "))
min_height = int(input("Enter minimum height of the image: "))
max_images = int(input("Enter the maximum number of images you want to download: "))
nsfw_only = input("Do you want to download only NSFW images? (yes/no): ").strip().lower() == 'yes'
if nsfw_only:
initial_url += "?nsfw=true"
# Ensure directory exists
if not os.path.exists(download_path):
os.makedirs(download_path)
# Regex to clean up prompt text
tag_re = re.compile(r'<.*?>')
downloaded_urls = set()
if os.path.exists("downloaded_urls.log"):
with open("downloaded_urls.log", "r") as log_file:
downloaded_urls = set(log_file.readlines())
with open("downloaded_urls.log", "a") as log_file:
next_url = initial_url
total_saved = 0
while next_url and total_saved < max_images:
response = requests.get(next_url, headers=headers)
response_data = response.json()
if 'metadata' in response_data and 'nextPage' in response_data['metadata']:
next_url = response_data['metadata']['nextPage']
else:
next_url = None
filtered_images = [image for image in response_data['items'] if
image['stats']['heartCount'] > 10 and
image['meta'] and 'prompt' in image['meta'] and
image['width'] >= min_width and
image['height'] >= min_height and
image['url'] not in downloaded_urls]
for image in tqdm(filtered_images, desc="Saving images and metadata", unit="image"):
image_url = image['url']
if image_url in downloaded_urls:
continue
# Download image
try:
image_response = requests.get(image_url)
img = Image.open(BytesIO(image_response.content))
# Convert image to RGB if necessary
if img.mode in ['RGBA', 'P']:
img = img.convert('RGB')
image_id = image['id']
# Save image
img_filename = os.path.join(download_path, f"{image_id}.jpg")
img.save(img_filename)
# Save meta.prompt as a text file after cleaning
meta_prompt = tag_re.sub('', image['meta']['prompt'])
meta_filename = os.path.join(download_path, f"{image_id}.txt")
with open(meta_filename, "w", encoding='utf-8') as meta_file:
meta_file.write(meta_prompt)
log_file.write(image_url + "\n")
downloaded_urls.add(image_url)
total_saved += 1
if total_saved >= max_images:
break
except UnidentifiedImageError:
print(f"Failed to identify the image from URL: {image_url}")
print(f"Downloaded and saved {total_saved} images and metadata files.")